Python之p站根据id爬取图片(多进程)
import requests
import os
import time
import re
from multiprocessing import Process
from concurrent.futures import ProcessPoolExecutor
def test(id_p):
head = {
'Referer': 'https://www.pixiv.net/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
'cookie': 'PHPSESSID=43437028_7c06ec1fd0e152e26fa0dab9c9fa919e'
}
headss = {
'Referer': 'https://www.pixiv.net',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}
su = 0
zp = 'https://www.pixiv.net/ajax/user/' + id_p + '/profile/all'
####os创建文件夹
if not os.path.exists(f'H:图片P站作者id:{id_p}'):
os.mkdir(f'H:图片P站作者id:{id_p}')
res = requests.get(zp, headers=head)
date = res.json()
# 生成图片路径
url_jpg = []
for k in date.get('body').get('illusts').keys():
url_jpg.append('https://www.pixiv.net/member_illust.php?mode=medium&illust_id=' + k)
# print(url_jpg)
for i in url_jpg:
res_id = requests.get(i, headers=head)
date_id = res_id.text
url = ''.join(re.findall('"original":"(.*?)"', date_id))
url = url.replace('\', '')
rese = requests.get(url, headers=headss)
with open(f'H:图片P站作者id:{id_p}{url.split("/")[-1]}', 'wb') as fw:
fw.write(rese.content)
fw.flush()
print(f'
--------{id_p}--------------{url.split("/")[-1]}------{su + 1}----------------',end='')
if i == url_jpg[-1]:
print(f'
--------{id_p}--------------{url.split("/")[-1]}------{su + 1}----------------',end='
')
su += 1
time.sleep(0.2)
time.sleep(0.5)
print(f'-----------{id_p}作品获取完成----------')
if __name__ == '__main__':
pool = ProcessPoolExecutor(3)
while True:
id_p = input('输入作者id生成网址')
pool.submit(test,id_p)