zoukankan      html  css  js  c++  java
  • Scrapy爬取妹子图保存到不同目录下

    进行设置settings

    #启动图片管道
    ITEM_PIPELINES = {
       'mztu.pipelines.ImagesPipelinse': 300,
    }
    #设置默认目录地址  注意下载图片的话默认地址必须设置!!!
    IMAGES_STORE = "E:\study\Python\scrapy\mztu\imges"
    #设置图片通道失效时间
    IMAGES_EXPIRES =90
    #缩略图生成
    #IMAGES_THUMBS = {
     #   'small': (50, 50),
    #    'big': (270, 270),
    #}

    spider目录

    # -*- coding: utf-8 -*-
    import scrapy
    from mztu.items import MztuItem
    
    class ZimdgSpider(scrapy.Spider):
        name = 'zimdg'
        allowed_domains = ['mzitu.com']
        #生成链接列表
        start_urls = ['http://www.mzitu.com/xinggan/page/{}/'.format(str(x)) for x in range(118)]
        def parse(self, response):
            #解析出链接
            set_li = response.xpath("//div[@class='postlist']/ul/li")
            for ecth in set_li:
                ed = ecth.xpath('./a/@href').extract()
                #进行二次分类解析
                yield scrapy.Request(ed[0],callback=self.parse_item)
    
    
        def parse_item(self,response):
            itme = MztuItem()
            # 获取页数链接进行访问
            offset = int(response.xpath('//div[@class="pagenavi"]/a/span/text()')[4].extract())
            #生成链接访问
            #遍历链接访问
            for i in [response.url+"/{}".format(str(x))  for x in range(1,offset+1)]:
                itme['Referer']=i
                #将meta传入链接
                yield scrapy.Request(itme['Referer'],meta={'meta_1':itme}, callback=self.parse_ponse)
            # for i in url:
    
        def parse_ponse(self,response):
            #获取itme资源
            itme = response.meta['meta_1']
            #获取图片地址
            imgs = response.xpath('//div[@class="main-image"]/p/a/img/@src')[0].extract()
            #获取图片目录
            title = response.xpath('//div[@class="main-image"]/p/a/img/@alt')[0].extract()
            itme["title"]= title
            itme["imge_url"]= imgs
            #itme["nickname"] = itme["Referer"][itme["Referer"].rfind("/"):]+itme["imge_url"][itme["imge_url"].rfind('/')+1:itme["imge_url"].rfind('.')]
            #itme["nickname"] = itme["imge_url"][itme["imge_url"].rfind('/')+1:itme["imge_url"].rfind('.')]
            yield itme

    items

    import scrapy
    
    
    class MztuItem(scrapy.Item):
        #目录
        title = scrapy.Field()
        #图片地址
        imge_url = scrapy.Field()
        #请求头
        Referer = scrapy.Field()
    
        image_Path = scrapy.Field()
        #图片名称
       # nickname = scrapy.Field()

    pipelines管道

    # -*- coding: utf-8 -*-
    
    # Define your item pipelines here
    #
    # Don't forget to add your pipeline to the ITEM_PIPELINES setting
    # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
    # 导入这个包为了移动文件
    import shutil
    #此包不解释
    import scrapy
    # 导入项目设置
    from scrapy.utils.project import get_project_settings
    # 导入scrapy框架的图片下载类
    from scrapy.pipelines.images import ImagesPipeline
    #此包不解释
    import os
    
    class ImagesPipelinse(ImagesPipeline):
        #def process_item(self, item, spider):
        #    return item
        # 获取settings文件里设置的变量值
        IMAGES_STORE = get_project_settings().get("IMAGES_STORE")
        # 重写ImagesPipeline类的此方法
        # 发送图片下载请求
        def get_media_requests(self, item, info):
            image_url = item["imge_url"]
            #headers是请求头主要是防反爬虫
            yield scrapy.Request(image_url,headers={'Referer':item['Referer']})
    
        def item_completed(self, result, item, info):
            image_path = [x["path"] for ok, x in result if ok]
            # 定义分类保存的路径
            img_path = "%s\%s" % (self.IMAGES_STORE, item['title'])
            # 目录不存在则创建目录
            if os.path.exists(img_path) == False:
                os.mkdir(img_path)
            # 将文件从默认下路路径移动到指定路径下
            shutil.move(self.IMAGES_STORE + "\\" +image_path[0], img_path + "\\" +image_path[0][image_path[0].find("full\\")+6:])
            item['image_Path'] = img_path + "\\" + image_path[0][image_path[0].find("full\\")+6:]
            return item

    这里实现图片保存到不同的目录下,主要函数是shutil.move(),将图片从原始默认路径移动到指定目录下

  • 相关阅读:
    vue.js 系列教程
    vue.js 生命周期
    MVVM代码例子
    vue.js windows下开发环境搭建
    Vue.js 之修饰符详解
    elementUi——适合于Vue的UI框架
    Vue.js——60分钟快速入门
    Keil sct分散加载文件
    Keil ARM-CM3 printf输出调试信息到Debug (printf) Viewer
    Cortex-M3(NXP LPC 1788) 启动代码
  • 原文地址:https://www.cnblogs.com/contiune/p/9384973.html
Copyright © 2011-2022 走看看