zoukankan      html  css  js  c++  java
  • python_爬虫_multiprocessing.dummy以及multiprocessing

    ''' # 代码范本
    任务添加函数、任务执行函数;进程、线程切换函数;进、线程开启函数;
    '''
    from multiprocessing import Pool as processPoll
    from multiprocessing.dummy import Pool as ThreadPool
    
    def get_page(): # 任务执行
        pass
    
    def url_list(): # 任务添加
        pass
    
    def get_pool(): # 设定进、线程
        pass
    
    def open_pool(): # 启动
        pass
    
    if __name__ == '__main__':
        open_pool()

    使用16线程爬取腾讯的招聘的100页分页信息,用时6秒左右(3M网速)

    '''
    任务添加函数、任务执行函数;进程、线程切换函数;进、线程开启函数;
    '''
    import requests
    from urllib import request
    import ssl
    ssl._create_default_https_context = ssl._create_unverified_context
    from datetime import datetime
    from multiprocessing import Pool as ProcessPoll
    from multiprocessing.dummy import Pool as ThreadPool
    
    def get_page(task_q): # 任务执行
        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
    
        req = request.Request(task_q,headers=headers)
        response = request.urlopen(req)
        print(response.url)
        # response = requests.get(task_q,headers=headers,verify=False)
        # print(response.url,response.status_code)
    
    def url_list(): # 任务添加
        task_q = []
        base_url = 'http://hr.tencent.com/position.php?start={}'
        for i in range(0,10*100,10):
            full_url = base_url.format(i)
            task_q.append(full_url)
        return task_q
    
    def get_pool(way=True,count=4): # 设定进、线程
        if way:
            pool = ProcessPoll(count) # 进程
        else:
            pool = ThreadPool(count) # 线程
        return pool
    
    def open_pool(): # 启动
        start = datetime.now()
        pool = get_pool(way=False,count=16)
        task_q = url_list()
    
        pool.map(get_page,task_q)
        pool.close()
        pool.join()
        end = datetime.now()
        print('程序结束,用时',end-start)
    if __name__ == '__main__':
        open_pool()
  • 相关阅读:
    解决Mac笔记本电脑自带录屏软件没有声音问题
    pip安装包后Import的时候提示找不到的解决方案
    Photoshop怎么给图片添加简介信息或者版权信息
    [2021/08/06]Ubuntu20 安装指定小版本Mysql(本文示例mysql8.0.18)
    Springboot2.3.5 实现JWT授权验证并针对不同用户实现多个拦截器
    搭建集群步骤注意事项
    docker服务程序网络排查
    网络分层协议集合分析
    括号串
    抢救实验数据
  • 原文地址:https://www.cnblogs.com/hejianlong/p/9345450.html
Copyright © 2011-2022 走看看