zoukankan      html  css  js  c++  java
  • Python之多进程根据p站画师id爬取

    Python之p站根据id爬取图片(多进程)

    import requests
    import os
    import time
    import re
    from multiprocessing import Process
    from concurrent.futures import ProcessPoolExecutor
    def test(id_p):
        head = {
            'Referer': 'https://www.pixiv.net/',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
            'cookie': 'PHPSESSID=43437028_7c06ec1fd0e152e26fa0dab9c9fa919e'
        }
    
        headss = {
            'Referer': 'https://www.pixiv.net',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
        }
        su = 0
        zp = 'https://www.pixiv.net/ajax/user/' + id_p + '/profile/all'
    
        ####os创建文件夹
        if not os.path.exists(f'H:图片P站作者id:{id_p}'):
            os.mkdir(f'H:图片P站作者id:{id_p}')
        res = requests.get(zp, headers=head)
        date = res.json()
        # 生成图片路径
        url_jpg = []
        for k in date.get('body').get('illusts').keys():
            url_jpg.append('https://www.pixiv.net/member_illust.php?mode=medium&illust_id=' + k)
        # print(url_jpg)
        for i in url_jpg:
            res_id = requests.get(i, headers=head)
            date_id = res_id.text
            url = ''.join(re.findall('"original":"(.*?)"', date_id))
            url = url.replace('\', '')
            rese = requests.get(url, headers=headss)
            with open(f'H:图片P站作者id:{id_p}{url.split("/")[-1]}', 'wb') as fw:
                fw.write(rese.content)
                fw.flush()
            print(f'
    --------{id_p}--------------{url.split("/")[-1]}------{su + 1}----------------',end='')
            if i == url_jpg[-1]:
                print(f'
    --------{id_p}--------------{url.split("/")[-1]}------{su + 1}----------------',end='
    ')
            su += 1
            time.sleep(0.2)
        time.sleep(0.5)
        print(f'-----------{id_p}作品获取完成----------')
    
    
    
    if __name__ == '__main__':
        pool = ProcessPoolExecutor(3)
        while True:
            id_p = input('输入作者id生成网址')
            pool.submit(test,id_p)
    
    
    
    
  • 相关阅读:
    RN8209校正软件开发心得(1)
    Chrome 31版本导出Excel问题
    ComBox选择
    网页设计的一般步骤
    .NET一套开发工具
    关于用sql语句实现一串数字位数不足在左侧补0的技巧
    python jieba模块详解
    python内置函数详细描述与实例演示
    Markdown的基本语法记录
    python configparser模块详解
  • 原文地址:https://www.cnblogs.com/ledgua/p/11574060.html
Copyright © 2011-2022 走看看