zoukankan      html  css  js  c++  java
  • 爬虫 ==》 同步调用

    爬虫 ==》  同步调用

    import requests
    def parse_page(res):
        print('PARSE %s' %(len(res)))
    
    def get_page(url):
        print('GET %s' %url)
        response=requests.get(url)
        if response.status_code == 200:
            return response.text
    
    
    if __name__ == '__main__':
        urls=[
            'https://www.baidu.com',
            'https://www.taobao.com',
            'https://www.openstack.org',
        ]
        for url in urls:
            res=get_page(url)
            parse_page(res)
    同步调用
    import requests
    from threading import Thread,current_thread
    
    def parse_page(res):
        print('%s PARSE %s' %(current_thread().getName(),len(res)))
    
    def get_page(url,callback=parse_page):
        print('%s GET %s' %(current_thread().getName(),url))
        response=requests.get(url)
        if response.status_code == 200:
            callback(response.text)
    
    
    if __name__ == '__main__':
        urls=[
            'https://www.baidu.com',
            'https://www.taobao.com',
            'https://www.openstack.org',
        ]
        for url in urls:
            t=Thread(target=get_page,args=(url,))
            t.start()
    多线程与多进程
    import requests
    from threading import current_thread
    from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
    
    def parse_page(res):
        res=res.result()
        print('%s PARSE %s' %(current_thread().getName(),len(res)))
    
    def get_page(url):
        print('%s GET %s' %(current_thread().getName(),url))
        response=requests.get(url)
        if response.status_code == 200:
            return response.text
    
    if __name__ == '__main__':
        urls=[
            'https://www.baidu.com',
            'https://www.taobao.com',
            'https://www.openstack.org',
        ]
        pool=ThreadPoolExecutor(50)
    
        for url in urls:
            pool.submit(get_page,url).add_done_callback(parse_page)
    
        pool.shutdown(wait=True)
    线程池与进程池
    from gevent import joinall,spawn,monkey;monkey.patch_all()
    import requests
    from threading import current_thread
    
    def parse_page(res):
        print('%s PARSE %s' %(current_thread().getName(),len(res)))
    
    def get_page(url,callback=parse_page):
        print('%s GET %s' %(current_thread().getName(),url))
        response=requests.get(url)
        if response.status_code == 200:
            callback(response.text)
    
    if __name__ == '__main__':
        urls=[
            'https://www.baidu.com',
            'https://www.taobao.com',
            'https://www.openstack.org',
        ]
    
        tasks=[]
        for url in urls:
            tasks.append(spawn(get_page,url))
    
        joinall(tasks)
    gevent模块
  • 相关阅读:
    Java技术之ThreadLocal的使用
    find and find_by
    vim-config
    把字符串当做js代码执行的方法
    lodop打印设计
    前端使用lodop如何获取打印状态
    lodop第三方插件的使用
    nodejs中http服务器,如何使用GET,POST请求发送数据、npm、以及一些插件的介绍
    nodejs 用http模块搭建的服务器的路由,以及路由代码的重构过程
    nodejs基础 用http模块 搭建一个简单的web服务器 响应JSON、html
  • 原文地址:https://www.cnblogs.com/zhongbokun/p/8330651.html
Copyright © 2011-2022 走看看