zoukankan      html  css  js  c++  java
  • 爬虫 ==》 同步调用

    爬虫 ==》  同步调用

    import requests
    def parse_page(res):
        print('PARSE %s' %(len(res)))
    
    def get_page(url):
        print('GET %s' %url)
        response=requests.get(url)
        if response.status_code == 200:
            return response.text
    
    
    if __name__ == '__main__':
        urls=[
            'https://www.baidu.com',
            'https://www.taobao.com',
            'https://www.openstack.org',
        ]
        for url in urls:
            res=get_page(url)
            parse_page(res)
    同步调用
    import requests
    from threading import Thread,current_thread
    
    def parse_page(res):
        print('%s PARSE %s' %(current_thread().getName(),len(res)))
    
    def get_page(url,callback=parse_page):
        print('%s GET %s' %(current_thread().getName(),url))
        response=requests.get(url)
        if response.status_code == 200:
            callback(response.text)
    
    
    if __name__ == '__main__':
        urls=[
            'https://www.baidu.com',
            'https://www.taobao.com',
            'https://www.openstack.org',
        ]
        for url in urls:
            t=Thread(target=get_page,args=(url,))
            t.start()
    多线程与多进程
    import requests
    from threading import current_thread
    from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
    
    def parse_page(res):
        res=res.result()
        print('%s PARSE %s' %(current_thread().getName(),len(res)))
    
    def get_page(url):
        print('%s GET %s' %(current_thread().getName(),url))
        response=requests.get(url)
        if response.status_code == 200:
            return response.text
    
    if __name__ == '__main__':
        urls=[
            'https://www.baidu.com',
            'https://www.taobao.com',
            'https://www.openstack.org',
        ]
        pool=ThreadPoolExecutor(50)
    
        for url in urls:
            pool.submit(get_page,url).add_done_callback(parse_page)
    
        pool.shutdown(wait=True)
    线程池与进程池
    from gevent import joinall,spawn,monkey;monkey.patch_all()
    import requests
    from threading import current_thread
    
    def parse_page(res):
        print('%s PARSE %s' %(current_thread().getName(),len(res)))
    
    def get_page(url,callback=parse_page):
        print('%s GET %s' %(current_thread().getName(),url))
        response=requests.get(url)
        if response.status_code == 200:
            callback(response.text)
    
    if __name__ == '__main__':
        urls=[
            'https://www.baidu.com',
            'https://www.taobao.com',
            'https://www.openstack.org',
        ]
    
        tasks=[]
        for url in urls:
            tasks.append(spawn(get_page,url))
    
        joinall(tasks)
    gevent模块
  • 相关阅读:
    【排序】SelectSort
    Linux下程序的Profile工具
    Ubuntu adb devices :???????????? no permissions 解决方法
    利用宏控制打印
    关于错误 Resource temporarily unavailable
    如何不使用pthread_cancel而杀死线程
    【排序】BubbleSort
    使用 autotools 生成包含多文件的 Makefile
    source命令使用
    2010 成都预选赛 Binary Number
  • 原文地址:https://www.cnblogs.com/zhongbokun/p/8330651.html
Copyright © 2011-2022 走看看