zoukankan      html  css  js  c++  java
  • 修改requests_html.AsyncHTMLSessions使得支持url参数

    一、修改源代码

    #重写AsyncHTMLSession中的run()方法
    
        def run(self, *coros,urls=None):
            """ Pass in all the coroutines you want to run, it will wrap each one
                in a task, run it and wait for the result. Return a list with all
                results, this is returned in the same order coros are passed in. """
            if urls:
                if isinstance(urls,list):
                    tasks = [
                        asyncio.ensure_future(coro(url)) for coro in coros  for url in urls
                    ]
                    done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                    return [t.result() for t in done]
                else:
                    tasks = [
                        asyncio.ensure_future(coro(urls)) for coro in coros
                    ]
                    done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                    return [t.result() for t in done]
            else:
                tasks = [
                    asyncio.ensure_future(coro()) for coro in coros
                ]
                done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                return [t.result() for t in done]

    二、测试

    from requests_html import AsyncHTMLSession
    
    asession = AsyncHTMLSession()
    
    async def get_link(link):
        res = await asession.get(link)
        return res.html.absolute_links
    
    url="https://www.cnblogs.com/"
    results = asession.run(get_link,urls=url)
    print(results)
    
    url=["https://www.cnblogs.com/","https://www.jd.com"]
    results = asession.run(get_link,urls=url)
    print(results)

    三、或者新建一个NewAsyncHTMLSession继承

    from requests_html import AsyncHTMLSession
    import asyncio
    
    class NewAsyncHTMLSession(AsyncHTMLSession):
        def run(self, *coros,urls=None):
            """ Pass in all the coroutines you want to run, it will wrap each one
                in a task, run it and wait for the result. Return a list with all
                results, this is returned in the same order coros are passed in. """
            if urls:
                if isinstance(urls,list):
                    tasks = [
                        asyncio.ensure_future(coro(url)) for coro in coros  for url in urls
                    ]
                    done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                    return [t.result() for t in done]
                else:
                    tasks = [
                        asyncio.ensure_future(coro(urls)) for coro in coros
                    ]
                    done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                    return [t.result() for t in done]
            else:
                tasks = [
                    asyncio.ensure_future(coro()) for coro in coros
                ]
                done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                return [t.result() for t in done]
    
    asession = NewAsyncHTMLSession()
    
    async def get_link(link):
        res = await asession.get(link)
        return res.html.absolute_links
    
    url="https://www.cnblogs.com/"
    results = asession.run(get_link,urls=url)
    print(results)
    
    url=["https://www.cnblogs.com/","https://www.jd.com"]
    results = asession.run(get_link,urls=url)
    print(results)
  • 相关阅读:
    gin使用validator库参数校验若干实用技巧
    在gin框架中使用JWT
    使用zap接收gin框架默认的日志并配置日志归档
    gin框架路由拆分与注册
    Gin框架介绍及使用
    GO学习-(39) 优雅地关机或重启
    GO学习-(38) Go语言结构体转map[string]interface{}的若干方法
    WPF中不规则窗体与WindowsFormsHost控件的兼容问题完美解决方案
    [ 夜间模式 ] NightVersion
    HDU1518 Square(DFS)
  • 原文地址:https://www.cnblogs.com/angelyan/p/13913926.html
Copyright © 2011-2022 走看看