zoukankan      html  css  js  c++  java
  • requests_html使用asyncio

    import asyncio
    import functools
    from concurrent.futures.thread import ThreadPoolExecutor
    from requests_html import HTMLSession
    import sys
    session = HTMLSession()
    
    
    async def get_response(executor, *, url, loop: asyncio.AbstractEventLoop = None, ):
        if not loop:
            loop = asyncio.get_running_loop()
        request = functools.partial(session.get, url)
        return loop.run_in_executor(executor, request)
    
    
    async def bulk_requests(executor, *,
                            urls,
                            loop: asyncio.AbstractEventLoop = None, ):
        for url in urls:
            yield await get_response(executor, url=url, loop=loop)
    
    
    def filter_unsuccesful_requests(responses_and_exceptions):
        return filter(
            lambda url_and_response: not isinstance(url_and_response[1], Exception),
            responses_and_exceptions.items()
        )
    
    
    async def main():
        executor = ThreadPoolExecutor(10)
        urls = [
            "https://baidu.com",
            "https://cnblogs.com",
            "https://163.com",
        ]
        requests = [request async for request in bulk_requests(executor, urls=urls, )]
        responses_and_exceptions = dict(zip(urls, await asyncio.gather(*requests, return_exceptions=True)))
        responses = {url: resp.html for (url, resp) in filter_unsuccesful_requests(responses_and_exceptions)}
    
        for res in responses.items():
            print(res[1].xpath("//head//title//text()")[0])
    
        for url in urls:
            if url not in responses:
                print(f"No successful request could be made to {url}. Reason: {responses_and_exceptions[url]}",
                      file=sys.stderr)
    
    
    asyncio.run(main())
    
    
  • 相关阅读:
    登录后返回到登录页问题
    vue组件插槽
    js定义类
    arguments.callee用法
    深拷贝的原生js实现
    Hybrid APP架构设计思路
    使用vlfeat 包中遇到的问题
    tensorflow faster rcnn 代码分析一 demo.py
    labelImg 工具
    faster rcnn 做识别
  • 原文地址:https://www.cnblogs.com/c-x-a/p/11028356.html
Copyright © 2011-2022 走看看