zoukankan html css js c++ java

scrapy 框架简单爬取 4K高清壁纸

import scrapy


class TpSpider(scrapy.Spider):
    name = 'tp'
    # allowed_domains = ['baidu.com']
    # 壁纸网址路径  爬取10页
    start_urls = ['http://pic.netbian.com/4kmeinv/index.html'] + 
                 ['http://pic.netbian.com/4kmeinv/index_%s.html' % page for page in range(2, 11)]
    
    def parse(self, response):
        # 获取普通图片地址
        li_list = response.xpath('//ul[@class="clearfix"]/li')
        for li in li_list:
            img = li.xpath('./a/@href').extract_first()
            imgs = 'http://pic.netbian.com' + img
            yield scrapy.Request(url=imgs, callback=self.imgs_parse)

    def imgs_parse(self, response):
        # 获取4K高清图片
        div_list = response.xpath('//div[@class="photo-pic"]/a')
        for div in div_list:
            name = div.xpath('./img/@alt').extract_first()
            img = div.xpath('./img/@src').extract_first()
            imgs = 'http://pic.netbian.com' + img
            yield scrapy.Request(url=imgs, callback=self.img_parse, meta={'name': name})

    def img_parse(self, response):
        # 下载图片
        name = response.meta['name']
        # 下载图片保存本地
        with open('./imgs/%s.jpg' % name, 'wb')as f:
            f.write(response.body)
        print('正在下载图片：%s' % name)

查看全文

相关阅读:
linux redis 安装和链接,,,
ppt转化pdf
跨服务器同步数据
 字典表相关
 代码重构,空间换时间,dictionary 不要用object ,需明确指定类型
 stringbuilder for test performance 性能 update 性能
 nvarchar 和varchar 在len下一致,datalength下nvarchar翻倍
 android GradLayout实现计算器
 屏幕录制GIF动画工具
 android SharedPreferences 简单的数据存储

原文地址：https://www.cnblogs.com/hyao/p/13303637.html

scrapy 框架简单 爬取 4K高清 壁纸

scrapy 框架简单爬取 4K高清壁纸