import requests import parsel, re, json headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36', } path = "./video/" for i in range(1): url = 'https://www.ximalaya.com/youshengshu/4256765/p%d/' % i response = requests.get(url, headers=headers, proxies=proxies) html_data = response.text selector = parsel.Selector(html_data) lis = selector.xpath('//*[@id="anchor_sound_list"]/div[2]/ul/li') for li in lis: title = li.xpath('.//a/@title').get() href = li.xpath('.//a/@href').get() m4a_id = href.split('/')[-1] video_url = f'https://www.ximalaya.com/revision/play/v1/audio?id={m4a_id}&ptype=1' print("开始下载音频数据:%s" % title) m4a_str = requests.get(url=video_url, headers=headers, proxies=proxies).text m4a_dict = json.loads(m4a_str) m4a_url = m4a_dict['data']['src'] m4a_data = requests.get(m4a_url, headers=headers, proxies=proxies).content pattern = r'[\/:*?"<>| ]+' pat = re.compile(pattern) sign = pat.search(title) if sign: new_title = re.sub(pattern, '_', title) with open(path + new_title + '.mp3', "wb") as w: w.write(m4a_data) print("%s音频数据保存完毕" % title) else: with open(path + title + '.mp3', "wb") as w: w.write(m4a_data) print("%s音频数据保存完毕" % title)