import scrapy
class TpSpider(scrapy.Spider):
name = 'tp'
# allowed_domains = ['baidu.com']
# 壁纸网址路径 爬取10页
start_urls = ['http://pic.netbian.com/4kmeinv/index.html'] +
['http://pic.netbian.com/4kmeinv/index_%s.html' % page for page in range(2, 11)]
def parse(self, response):
# 获取普通图片地址
li_list = response.xpath('//ul[@class="clearfix"]/li')
for li in li_list:
img = li.xpath('./a/@href').extract_first()
imgs = 'http://pic.netbian.com' + img
yield scrapy.Request(url=imgs, callback=self.imgs_parse)
def imgs_parse(self, response):
# 获取4K高清图片
div_list = response.xpath('//div[@class="photo-pic"]/a')
for div in div_list:
name = div.xpath('./img/@alt').extract_first()
img = div.xpath('./img/@src').extract_first()
imgs = 'http://pic.netbian.com' + img
yield scrapy.Request(url=imgs, callback=self.img_parse, meta={'name': name})
def img_parse(self, response):
# 下载图片
name = response.meta['name']
# 下载图片保存本地
with open('./imgs/%s.jpg' % name, 'wb')as f:
f.write(response.body)
print('正在下载图片:%s' % name)