zoukankan      html  css  js  c++  java
  • 宝马5系图片分类下载自动创建文件夹并保存

    import os
    import requests
    from lxml import etree
    from urllib import request
    
    
    headers = {
      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
      'Accept-Language': 'en',
      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'
    }
    
    def parse(url):
        """解析网页,提取数据封装为列表返回"""
        page_source = requests.get(url,headers=headers).text
        html = etree.HTML(page_source)
    
        uiboxs = html.xpath("//div[@class='uibox']")[1:]
        items = []
        for uibox in uiboxs:
            category = uibox.xpath("./div[@class='uibox-title']/a/text()")[0]
            img_urls = uibox.xpath(".//li//img/@src")
            img_urls = list(map(lambda url : "https:"+url, img_urls))#将map对象转换成list对象# for img_url in img_urls:#     img_url = "https"+img_url
            
            item = {'category':category, 'img_urls': img_urls}#将数据封装为字典加入列表并返回
            items.append(item)
        return items
    
    def pipeline(url):
        """自动创建分类文件夹保存图片"""
        abspath = os.path.dirname(__file__)#获取当前文件所在的父路径
        imgpath = os.path.join(abspath,"images")#拼接当前路径
        
        if not os.path.exists(imgpath):#不存在则创建
            os.mkdir(imgpath)
            
        for item in parse(url):
            category = item['category']
            img_urls = item['img_urls']
        
            category_path = os.path.join(imgpath, category)#分类列表不存在则创建
            if not os.path.exists(category_path):
                os.mkdir(category_path)
                
            for img_url in img_urls:
                img_name = img_url.split('_')[-1]
                savepath = os.path.join(category_path, img_name)
                request.urlretrieve(img_url, savepath)
                print(img_name, "下载完成")
    
    
    if __name__ == "__main__":
        url = "https://car.autohome.com.cn/pic/series/65.html#pvareaid=3454507"
        pipeline(url)
    
    
    
    
    
    
    
  • 相关阅读:
    一条select语句的执行流程
    理解数据库的事物,ACID,cap
    java并发volatile和sychnorized的底层机制
    避免死锁的几种方式
    如何减少线程上下文切换
    RestTemplate设置超时时间
    spring事务隔离级别和传播级别
    mysql数据库与其他数据库的区别
    spingcloud组件注解汇总
    python二级选择题易错知识点总结
  • 原文地址:https://www.cnblogs.com/zxfei/p/12148817.html
Copyright © 2011-2022 走看看