zoukankan      html  css  js  c++  java
  • 修改requests_html.AsyncHTMLSessions使得支持url参数

    一、修改源代码

    #重写AsyncHTMLSession中的run()方法
    
        def run(self, *coros,urls=None):
            """ Pass in all the coroutines you want to run, it will wrap each one
                in a task, run it and wait for the result. Return a list with all
                results, this is returned in the same order coros are passed in. """
            if urls:
                if isinstance(urls,list):
                    tasks = [
                        asyncio.ensure_future(coro(url)) for coro in coros  for url in urls
                    ]
                    done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                    return [t.result() for t in done]
                else:
                    tasks = [
                        asyncio.ensure_future(coro(urls)) for coro in coros
                    ]
                    done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                    return [t.result() for t in done]
            else:
                tasks = [
                    asyncio.ensure_future(coro()) for coro in coros
                ]
                done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                return [t.result() for t in done]

    二、测试

    from requests_html import AsyncHTMLSession
    
    asession = AsyncHTMLSession()
    
    async def get_link(link):
        res = await asession.get(link)
        return res.html.absolute_links
    
    url="https://www.cnblogs.com/"
    results = asession.run(get_link,urls=url)
    print(results)
    
    url=["https://www.cnblogs.com/","https://www.jd.com"]
    results = asession.run(get_link,urls=url)
    print(results)

    三、或者新建一个NewAsyncHTMLSession继承

    from requests_html import AsyncHTMLSession
    import asyncio
    
    class NewAsyncHTMLSession(AsyncHTMLSession):
        def run(self, *coros,urls=None):
            """ Pass in all the coroutines you want to run, it will wrap each one
                in a task, run it and wait for the result. Return a list with all
                results, this is returned in the same order coros are passed in. """
            if urls:
                if isinstance(urls,list):
                    tasks = [
                        asyncio.ensure_future(coro(url)) for coro in coros  for url in urls
                    ]
                    done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                    return [t.result() for t in done]
                else:
                    tasks = [
                        asyncio.ensure_future(coro(urls)) for coro in coros
                    ]
                    done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                    return [t.result() for t in done]
            else:
                tasks = [
                    asyncio.ensure_future(coro()) for coro in coros
                ]
                done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
                return [t.result() for t in done]
    
    asession = NewAsyncHTMLSession()
    
    async def get_link(link):
        res = await asession.get(link)
        return res.html.absolute_links
    
    url="https://www.cnblogs.com/"
    results = asession.run(get_link,urls=url)
    print(results)
    
    url=["https://www.cnblogs.com/","https://www.jd.com"]
    results = asession.run(get_link,urls=url)
    print(results)
  • 相关阅读:
    学习鸟哥的Linux私房菜笔记(16)——Ubuntu中建立ftp服务
    gdal库对ENVI文件的一点支持不好
    学习鸟哥的Linux私房菜笔记(15)——文件系统
    学习鸟哥的Linux私房菜笔记(14)——硬件配置与管理
    使用jQuery加载script脚本
    学习鸟哥的Linux私房菜笔记(13)——用户管理
    学习鸟哥的Linux私房菜笔记(12)——系统监视2
    学习鸟哥的Linux私房菜笔记(11)——系统监视1
    学习鸟哥的Linux私房菜笔记(10)——bash2
    学习鸟哥的Linux私房菜笔记(9)——bash1
  • 原文地址:https://www.cnblogs.com/angelyan/p/13913926.html
Copyright © 2011-2022 走看看