zoukankan      html  css  js  c++  java
  • 修改requests_html.AsyncHTMLSessions使得支持url参数

    一、修改源代码

    #重写AsyncHTMLSession中的run()方法
    
        def run(self, *coros,urls=None):
            """ Pass in all the coroutines you want to run, it will wrap each one
                in a task, run it and wait for the result. Return a list with all
                results, this is returned in the same order coros are passed in. """
            if urls:
                if isinstance(urls,list):
                    tasks = [
                        asyncio.ensure_future(coro(url)) for coro in coros  for url in urls
                    ]
                    done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                    return [t.result() for t in done]
                else:
                    tasks = [
                        asyncio.ensure_future(coro(urls)) for coro in coros
                    ]
                    done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                    return [t.result() for t in done]
            else:
                tasks = [
                    asyncio.ensure_future(coro()) for coro in coros
                ]
                done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                return [t.result() for t in done]

    二、测试

    from requests_html import AsyncHTMLSession
    
    asession = AsyncHTMLSession()
    
    async def get_link(link):
        res = await asession.get(link)
        return res.html.absolute_links
    
    url="https://www.cnblogs.com/"
    results = asession.run(get_link,urls=url)
    print(results)
    
    url=["https://www.cnblogs.com/","https://www.jd.com"]
    results = asession.run(get_link,urls=url)
    print(results)

    三、或者新建一个NewAsyncHTMLSession继承

    from requests_html import AsyncHTMLSession
    import asyncio
    
    class NewAsyncHTMLSession(AsyncHTMLSession):
        def run(self, *coros,urls=None):
            """ Pass in all the coroutines you want to run, it will wrap each one
                in a task, run it and wait for the result. Return a list with all
                results, this is returned in the same order coros are passed in. """
            if urls:
                if isinstance(urls,list):
                    tasks = [
                        asyncio.ensure_future(coro(url)) for coro in coros  for url in urls
                    ]
                    done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                    return [t.result() for t in done]
                else:
                    tasks = [
                        asyncio.ensure_future(coro(urls)) for coro in coros
                    ]
                    done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                    return [t.result() for t in done]
            else:
                tasks = [
                    asyncio.ensure_future(coro()) for coro in coros
                ]
                done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                return [t.result() for t in done]
    
    asession = NewAsyncHTMLSession()
    
    async def get_link(link):
        res = await asession.get(link)
        return res.html.absolute_links
    
    url="https://www.cnblogs.com/"
    results = asession.run(get_link,urls=url)
    print(results)
    
    url=["https://www.cnblogs.com/","https://www.jd.com"]
    results = asession.run(get_link,urls=url)
    print(results)
  • 相关阅读:
    linux 磁盘管理学习笔记
    Apache的Order Allow Deny心得
    NodeJs 笔记
    JavaScript 笔记
    MySQL 学习笔记
    HTML 转义符
    UTF-8 BOM(EF BB BF)
    [ Python
    [ Python
    [ Python
  • 原文地址:https://www.cnblogs.com/angelyan/p/13913926.html
Copyright © 2011-2022 走看看