要点:
1.resquests模块不支持异步,在需要异步的地方使用aiohttp模块进行替换
2.定义一个协程函数,创建协程任务,将 协程 打包为一个 Task 排入日程准备执行。返回 Task 对象
获取当前事件循环,开启循环
async def func(arge):
task = asyncio.ensure_future(func(arge))
loop = asyncio.get_event_loop(
loop.run_until_complete(asyncio.wait(task_list))
import asyncio
import requests,re
import aiohttp
from lxml import etree
from random import randint
target_url = 'https://www.pearvideo.com'
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36',
}
response = requests.get(url = target_url,headers = headers)
ht = response.text
tree = etree.HTML(ht)
link_list = tree.xpath("//*[@id = 'actRecommendCont'][1]//a[@class = 'actcont-detail actplay']/@href")
videoUrl = []
for link in link_list:
detail = 'https://www.pearvideo.com/' + link
response1 = requests.get(url = detail,headers=headers)
ht = response1.text
reg = 'var contId.*?srcUrl="(.*?)"'
link = re.findall(reg,ht,re.S)[0]
videoUrl.append(link)
async def getVideoDate(url):
fn = int(randint(1,999))
print('开始下载视频%s' % fn)
async with aiohttp.ClientSession() as s:
async with await s.get(url = url,headers = headers) as response:
data = await response.read() #此处参考aiohttp,获取的数据类型(bytes-like)
with open('./%s.mp4' % fn,'ab') as f:
f.write(data)
print('视频%s下载完毕' % fn)
task_list = []
for url in videoUrl:
task = asyncio.ensure_future(getVideoDate(url))
task_list.append(task)
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(task_list))