#-*- coding:utf-8 -*- from multiprocessing import Pool from bs4 import BeautifulSoup import requests from lxml import etree import re import os import time url = 'https://bing.ioliu.cn/' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36' } #定义request 会话对象 requestsSession = requests.Session() imgPage = (x for x in range(1,146)) def getImg(imgPage): page = imgPage #页数循环 params = { "p": page } #请求页面 responseHtml = requestsSession.get(url=url,headers=headers,params=params,timeout=3).text #通过etree 实例化,通过xpath 表达式过滤 src xpathObj = etree.HTML(responseHtml) imgSrc = xpathObj.xpath('//div[@class="container"]//div[@class="card progressive"]/img/@src') page =str(page) #以页数为目录,通过 os.path 判断下 if not os.path.exists('./Img/' + page): os.mkdir('./Img/' + page) #获取每个页面的 图片并且存储 for i in imgSrc: imgName = i.split('/')[-1].split('?')[0] imgData = requestsSession.get(url=i,headers=headers).content imgPath = './Img/' + page + '/' + imgName + '.jpg' with open(imgPath,'wb') as dp: dp.write(imgData) print(imgName, '下载成功!') # 每个页面请求间隔3s 防止被禁 time.sleep(3) if __name__ == "__main__": #通过多线程池的方式进行梳理 ,向 map 方法传递一个 函数和一个列表 ,map 函数使函数和 列表一一对应进行处理 with Pool(10) as p: p.map(getImg,imgPage)