import requests
import parsel, re, json
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36',
}
path = "./video/"
for i in range(1):
url = 'https://www.ximalaya.com/youshengshu/4256765/p%d/' % i
response = requests.get(url, headers=headers, proxies=proxies)
html_data = response.text
selector = parsel.Selector(html_data)
lis = selector.xpath('//*[@id="anchor_sound_list"]/div[2]/ul/li')
for li in lis:
title = li.xpath('.//a/@title').get()
href = li.xpath('.//a/@href').get()
m4a_id = href.split('/')[-1]
video_url = f'https://www.ximalaya.com/revision/play/v1/audio?id={m4a_id}&ptype=1'
print("开始下载音频数据:%s" % title)
m4a_str = requests.get(url=video_url, headers=headers, proxies=proxies).text
m4a_dict = json.loads(m4a_str)
m4a_url = m4a_dict['data']['src']
m4a_data = requests.get(m4a_url, headers=headers, proxies=proxies).content
pattern = r'[\/:*?"<>|
]+'
pat = re.compile(pattern)
sign = pat.search(title)
if sign:
new_title = re.sub(pattern, '_', title)
with open(path + new_title + '.mp3', "wb") as w:
w.write(m4a_data)
print("%s音频数据保存完毕" % title)
else:
with open(path + title + '.mp3', "wb") as w:
w.write(m4a_data)
print("%s音频数据保存完毕" % title)