1 """使用多线程爬取梨视频视频数据""" 2 """https://www.cnblogs.com/zivli/p/11614103.html""" 3 4 5 import requests 6 import re 7 from lxml import etree 8 from multiprocessing.dummy import Pool 9 10 url = 'https://www.pearvideo.com/category_5' 11 page_text = requests.get(url=url).text 12 13 tree = etree.HTML(page_text) 14 # 1、获取页面中视频详情地址 15 li_list = tree.xpath('//ul[@id="listvideoListUl"]/li') 16 url_list = [] 17 for i in li_list: 18 # 2、构造出每个视频的详情地址 19 detail_url = "https://www.pearvideo.com/" + i.xpath('./div/a/@href')[0] 20 name = i.xpath('./div/a/div[2]/text()')[0] + '.mp4' 21 # 3、向视频详情地址发起请求 22 detail_page = requests.get(url=detail_url).text 23 # 4、从response中解析出视频的真实地址 24 ex = 'srcUrl="(.*?)",vdoUrl' 25 video_url = re.findall(ex, detail_page)[0] 26 dic = { 27 'name': name, 28 'url': video_url 29 } 30 url_list.append(dic) 31 32 33 def get_video_data(d): 34 """ 35 向视频地址发起请求,二进制写入本地文件 36 :param d: 37 :return: 38 """ 39 url = d['url'] 40 data = requests.get(url=url).content 41 print(d['name'], "正在下载。。。") 42 with open(d['name'], 'wb') as f: 43 f.write(data) 44 print(d['name'], "下载成功。。。") 45 46 47 # 使用多进程处理 48 pool = Pool(4) 49 pool.map(get_video_data, url_list) 50 pool.close() 51 pool.join()