zoukankan      html  css  js  c++  java
  • 爬虫-- 初级

    普通同步代码 耗时

    import requests
    from functools import wraps
    import time
    
    def time_count(func):
        @wraps(func)
        def inner_func(*args,**kw):
            start = time.time()
            result = func(*args,**kw)
            end  = time.time()
            print('func {} cost {:.2f} s'.format(func.__name__,end-start))
            return result
        return inner_func
    
    
    @time_count
    def normal():
        for i in range(2):
            r = requests.get(URL)
            url = r.url
            print(url)
            
    if __name__ == '__main__':
        URL = 'https://morvanzhou.github.io'
        normal()
    

    异步 io (asyncio , aiohttp 等) 对于 IO 密集型 使用 异步 io 来处理,对于 计算密集型也就是依赖于 CPU 的,采用多进程

    import asyncio
    import aiohttp
    from functools import wraps
    import time
    
    def time_count(func):
        @wraps(func)
        def inner_func(*args,**kw):
            start = time.time()
            t = args[0]
            result = func(*args,**kw)
            end  = time.time()
            print('func {}-{} cost {:.2f} s'.format(func.__name__,t,end-start))
            return result
        return inner_func
    
    @time_count
    async def job(session):
        response = await session.get(URL)
        return str(response.url)
    
    # @time_count()
    async def main(loop):
        async with aiohttp.ClientSession() as session:
                tasks = [loop.create_task(job(session)) for _ in range(2)]
                finished, unfinished = await asyncio.wait(tasks)
                all_results = [r.result() for r in finished]
                print(all_results)
    
    if __name__ == '__main__':
        URL = 'https://morvanzhou.github.io'
        loop = asyncio.get_event_loop()
        loop.run_until_complete(main(loop))
    
    

    另一个 异步 举例

    import asyncio
    from functools import wraps
    import time
    
    def time_count(func):
        @wraps(func)
        def inner_func(*args,**kw):
            start = time.time()
            t = args[0]
            result = func(*args,**kw)
            end  = time.time()
            print('func {}-{} cost {:.2f} s'.format(func.__name__,t,end-start))
            return result
        return inner_func
    
    @time_count
    async def job(t):
    #     print('String job', t)
        await asyncio.sleep(t)
    #     print('Job', t , 'takes ', t, 's')
    
    @time_count
    async def main(loop):
        tasks = [loop.create_task(job(i)) for i in range(3)]
        await asyncio.wait(tasks)
        
    if __name__ == '__main__':
        loop = asyncio.get_event_loop()
        loop.run_until_complete(main(loop))
    

    同步举例

    from functools import wraps
    import time
    
    def time_count(func):
        @wraps(func)
        def inner_func(*args,**kw):
            start = time.time()
            result = func(*args,**kw)
            end  = time.time()
            print('func {} cost {:.2f} s'.format(func.__name__,end-start))
            return result
        return inner_func
    
    @time_count
    def job(t):
    #     print('String job', t)
        time.sleep(t)
    #     print('Job', t , 'takes ', t, 's')
    
    @time_count    
    def main():
        [job(i) for i in range(3)]
        
    
    if __name__ == '__main__':
        main()
        
        
    
  • 相关阅读:
    弹出层
    jQuerySelectors(选择器)的使用(三、简单篇)
    jQuerySelectors(选择器)的使用(二、层次篇)
    jQuerySelectors(选择器)的使用(四五、内容篇&可见性篇)
    对frameset、frame、iframe的js操作
    文件上传
    C# 字符串操作
    图片防盗链之HttpHandler方法实现
    MSSQL 存储过程
    dataset操作
  • 原文地址:https://www.cnblogs.com/Frank99/p/10397334.html
Copyright © 2011-2022 走看看