zoukankan      html  css  js  c++  java
  • 爬虫 ==》 同步调用

    爬虫 ==》  同步调用

    import requests
    def parse_page(res):
        print('PARSE %s' %(len(res)))
    
    def get_page(url):
        print('GET %s' %url)
        response=requests.get(url)
        if response.status_code == 200:
            return response.text
    
    
    if __name__ == '__main__':
        urls=[
            'https://www.baidu.com',
            'https://www.taobao.com',
            'https://www.openstack.org',
        ]
        for url in urls:
            res=get_page(url)
            parse_page(res)
    同步调用
    import requests
    from threading import Thread,current_thread
    
    def parse_page(res):
        print('%s PARSE %s' %(current_thread().getName(),len(res)))
    
    def get_page(url,callback=parse_page):
        print('%s GET %s' %(current_thread().getName(),url))
        response=requests.get(url)
        if response.status_code == 200:
            callback(response.text)
    
    
    if __name__ == '__main__':
        urls=[
            'https://www.baidu.com',
            'https://www.taobao.com',
            'https://www.openstack.org',
        ]
        for url in urls:
            t=Thread(target=get_page,args=(url,))
            t.start()
    多线程与多进程
    import requests
    from threading import current_thread
    from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
    
    def parse_page(res):
        res=res.result()
        print('%s PARSE %s' %(current_thread().getName(),len(res)))
    
    def get_page(url):
        print('%s GET %s' %(current_thread().getName(),url))
        response=requests.get(url)
        if response.status_code == 200:
            return response.text
    
    if __name__ == '__main__':
        urls=[
            'https://www.baidu.com',
            'https://www.taobao.com',
            'https://www.openstack.org',
        ]
        pool=ThreadPoolExecutor(50)
    
        for url in urls:
            pool.submit(get_page,url).add_done_callback(parse_page)
    
        pool.shutdown(wait=True)
    线程池与进程池
    from gevent import joinall,spawn,monkey;monkey.patch_all()
    import requests
    from threading import current_thread
    
    def parse_page(res):
        print('%s PARSE %s' %(current_thread().getName(),len(res)))
    
    def get_page(url,callback=parse_page):
        print('%s GET %s' %(current_thread().getName(),url))
        response=requests.get(url)
        if response.status_code == 200:
            callback(response.text)
    
    if __name__ == '__main__':
        urls=[
            'https://www.baidu.com',
            'https://www.taobao.com',
            'https://www.openstack.org',
        ]
    
        tasks=[]
        for url in urls:
            tasks.append(spawn(get_page,url))
    
        joinall(tasks)
    gevent模块
  • 相关阅读:
    Docker学习笔记之一,搭建一个JAVA Tomcat运行环境
    利用Docker构建开发环境
    MyEclipse 8.6.1 制作绿色版
    Tomcat,JBoss与JBoss Web
    oracle,mysql,SqlServer三种数据库的分页查询
    Tomcat+JSP经典配置实例
    [转载]JDK自带的实用工具——native2ascii.exe
    用sql删除数据库重复的数据的方法
    Dom4j 使用简介(全而好的文章)
    Java操作XML文件 dom4j 篇
  • 原文地址:https://www.cnblogs.com/zhongbokun/p/8330651.html
Copyright © 2011-2022 走看看