zoukankan      html  css  js  c++  java
  • 宝马5系图片分类下载自动创建文件夹并保存

    import os
    import requests
    from lxml import etree
    from urllib import request
    
    
    headers = {
      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
      'Accept-Language': 'en',
      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'
    }
    
    def parse(url):
        """解析网页,提取数据封装为列表返回"""
        page_source = requests.get(url,headers=headers).text
        html = etree.HTML(page_source)
    
        uiboxs = html.xpath("//div[@class='uibox']")[1:]
        items = []
        for uibox in uiboxs:
            category = uibox.xpath("./div[@class='uibox-title']/a/text()")[0]
            img_urls = uibox.xpath(".//li//img/@src")
            img_urls = list(map(lambda url : "https:"+url, img_urls))#将map对象转换成list对象# for img_url in img_urls:#     img_url = "https"+img_url
            
            item = {'category':category, 'img_urls': img_urls}#将数据封装为字典加入列表并返回
            items.append(item)
        return items
    
    def pipeline(url):
        """自动创建分类文件夹保存图片"""
        abspath = os.path.dirname(__file__)#获取当前文件所在的父路径
        imgpath = os.path.join(abspath,"images")#拼接当前路径
        
        if not os.path.exists(imgpath):#不存在则创建
            os.mkdir(imgpath)
            
        for item in parse(url):
            category = item['category']
            img_urls = item['img_urls']
        
            category_path = os.path.join(imgpath, category)#分类列表不存在则创建
            if not os.path.exists(category_path):
                os.mkdir(category_path)
                
            for img_url in img_urls:
                img_name = img_url.split('_')[-1]
                savepath = os.path.join(category_path, img_name)
                request.urlretrieve(img_url, savepath)
                print(img_name, "下载完成")
    
    
    if __name__ == "__main__":
        url = "https://car.autohome.com.cn/pic/series/65.html#pvareaid=3454507"
        pipeline(url)
    
    
    
    
    
    
    
  • 相关阅读:
    week02 线性表
    week01绪论
    第一周作业
    C语言第二次实验作业
    C语言实验报告
    博客作业06--图
    博客作业05--查找
    博客作业04--树
    博客作业03--栈和队列
    博客作业2---线性表
  • 原文地址:https://www.cnblogs.com/zxfei/p/12148817.html
Copyright © 2011-2022 走看看