一、修改源代码
#重写AsyncHTMLSession中的run()方法
def run(self, *coros,urls=None):
""" Pass in all the coroutines you want to run, it will wrap each one
in a task, run it and wait for the result. Return a list with all
results, this is returned in the same order coros are passed in. """
if urls:
if isinstance(urls,list):
tasks = [
asyncio.ensure_future(coro(url)) for coro in coros for url in urls
]
done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
return [t.result() for t in done]
else:
tasks = [
asyncio.ensure_future(coro(urls)) for coro in coros
]
done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
return [t.result() for t in done]
else:
tasks = [
asyncio.ensure_future(coro()) for coro in coros
]
done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
return [t.result() for t in done]
二、测试
from requests_html import AsyncHTMLSession
asession = AsyncHTMLSession()
async def get_link(link):
res = await asession.get(link)
return res.html.absolute_links
url="https://www.cnblogs.com/"
results = asession.run(get_link,urls=url)
print(results)
url=["https://www.cnblogs.com/","https://www.jd.com"]
results = asession.run(get_link,urls=url)
print(results)
三、或者新建一个NewAsyncHTMLSession继承
from requests_html import AsyncHTMLSession
import asyncio
class NewAsyncHTMLSession(AsyncHTMLSession):
def run(self, *coros,urls=None):
""" Pass in all the coroutines you want to run, it will wrap each one
in a task, run it and wait for the result. Return a list with all
results, this is returned in the same order coros are passed in. """
if urls:
if isinstance(urls,list):
tasks = [
asyncio.ensure_future(coro(url)) for coro in coros for url in urls
]
done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
return [t.result() for t in done]
else:
tasks = [
asyncio.ensure_future(coro(urls)) for coro in coros
]
done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
return [t.result() for t in done]
else:
tasks = [
asyncio.ensure_future(coro()) for coro in coros
]
done, _ = self.loop.run_until_complete(asyncio.wait(tasks))
return [t.result() for t in done]
asession = NewAsyncHTMLSession()
async def get_link(link):
res = await asession.get(link)
return res.html.absolute_links
url="https://www.cnblogs.com/"
results = asession.run(get_link,urls=url)
print(results)
url=["https://www.cnblogs.com/","https://www.jd.com"]
results = asession.run(get_link,urls=url)
print(results)