1 """ 2 爬取必应壁纸数据 3 """ 4 5 import requests 6 from lxml import etree 7 import os 8 9 url = "https://bing.ioliu.cn/" 10 headers = { 11 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36' 12 } 13 resp = requests.get(url=url,headers=headers) 14 # print(type(resp.text)) # 转换为文本信息,数据类型为str 15 # 数据预处理 16 resp_html = etree.HTML(resp.text) 17 # 图片地址获取 18 img_urls_list = resp_html.xpath('//img/@data-progressive') # 获取的数据类型为list 19 img_names_list = resp_html.xpath('//h3/text()') 20 21 for img_url,img_name in zip(img_urls_list,img_names_list): 22 print(img_url) 23 print(img_name.split('(©')[0]) 24 result = requests.get(img_url.split('640x480')[0] + '1920x1080.jpg' ).content 25 dir_path = "picture" 26 27 # 如果文件夹不存在,则创建新的文件夹 28 if not os.path.exists(dir_path): 29 os.mkdir(dir_path) 30 31 f = open(dir_path + '/' + img_name.split('(©')[0] + '.jpg','wb') 32 f.write(result) 33 f.close()