闲来无事,爬爬音乐:
酷我
1 import os.path 2 import pprint 3 import random 4 import time 5 from selenium import webdriver 6 from selenium.webdriver.common.by import By 7 import requests 8 import json 9 10 filePath = '20211210酷我音乐\\' 11 if not os.path.exists(filePath): 12 os.mkdir(filePath) 13 14 headers = { 15 "Cookie": "_afaff42f0ce19b169a8071123a4797=1639125104,1639130623,1639130639,1639143584; _gat=1; Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1639144335; kw_token=8EQN23CVOTJ", 16 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36", 17 } 18 driver = webdriver.Chrome() 19 url = 'http://www.kuwo.cn/rankList' 20 driver.get(url=url) 21 driver.maximize_window() 22 driver.implicitly_wait(10) 23 24 def get_song_details(): 25 lis = driver.find_elements(By.CSS_SELECTOR, 'div.list_out > div:nth-child(1) > ul > li') # 查找歌曲列表 26 # 遍历列表 27 for item in lis: 28 songUrl = item.find_element(By.CSS_SELECTOR, '.song_name.flex_c .name').get_attribute('href') 29 songTitle = item.find_element(By.CSS_SELECTOR, '.song_name.flex_c .name').get_attribute('title') 30 songId = songUrl.split('/')[-1] 31 # print(songId) 32 # 开始请求下载url 33 songPlayUrl = f'http://www.kuwo.cn/api/v1/www/music/playUrl?mid={songId}' 34 response = requests.get(url=songPlayUrl, headers=headers) 35 # pprint.pprint(response.json()) 36 try: 37 downLoadUrl = response.json()['data']['url'] 38 print(downLoadUrl) 39 with open(filePath + songTitle + '.mp3', mode='wb') as f: 40 f.write(requests.get(downLoadUrl, headers=headers).content) 41 except: 42 print(songTitle + '可能为付费歌曲,暂时不支持下载!') 43 44 if __name__ == "__main__": 45 for page in range(1, 10+1): 46 print(f'----------------------------------正在爬取第{page}页内容------------------------------------') 47 time.sleep(random.uniform(2, 5)) 48 get_song_details() # 获取歌曲 49 nextButton = driver.find_element(By.CSS_SELECTOR, '#__layout > div > div.container > div > div.main_con > div.con_r > div:nth-child(1) > div.page-wrap > i.li-page.iconfont.icon-icon_pagedown') 50 if nextButton: 51 nextButton.click() # 点击下一页 52 else: 53 print('已经全部爬取完毕,关闭浏览器!') 54 driver.close()
爬取完后截图:
别人的接口
1 import os.path 2 import random 3 import time 4 import threading 5 import requests 6 7 filePath = '20211211咪咕音乐\\' 8 if not os.path.exists(filePath): 9 os.mkdir(filePath) 10 11 def downLoadMusic(): 12 headers = { 13 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36', 14 } 15 url = 'http://59.110.45.28/m/api/search' 16 i = 1 17 keys = ['f0bcSS_AZEFDV43HhIP5qjf_l3_7Ic9NxBjclgRQuuos51NKVKteFOg5oNblKYnB8taPkt5JuRgwPCK3', 18 'd37bEP-7FW1LHnXKXqRhTWApsr2G8Xwt0E35r7uzgk_a0VseiD2-dtkeT-lsOISJ8Zs4HptyxOxFqxlS', 19 '89107AMWHx8zY84EqpFGRNSxNiqodXBoLo68OyWJzFxN54pQB0j2Q_un5WqAOEfJ7QD5qd8wqCNRjZ38', 20 'a576e28Jxgx8R-rRh3NdsU5Xw7folgexZ-nqVDQ4eRDi5JGXa491Ck0cMGEewPlX1u6w8ndG8bCYa1fB', 21 'ab6ev8wrmb_3Dspld8kWQAF3HuJa3hHcq6A8P-fxFUE_gaWb0wdCBV58DgS1yaPR0ft0gBSa1iFsaB8q', 22 '2ddaZ7trZhrE7JGc4vJe8Y6IPdG8f4twT5RqyAOjcPkKNkMxL9UurFePTDgLrPirQO3DlreV2op5pZ26', 23 '268bBfgmpPVt41Qk-HK_P25wjTveUGPk9bsDRxEPAvzUsDufQS7BTL-0Fa_yVbGdCsa7-880aFaSim-H', 24 '8ddfLDMJwz4YM-z6jt0MNDzy9pqif9EpInu-8JURAIWNnZZgucPtG6DaYnAKrNI-3jtctwXArf7ZTLqm', 25 '328dzRep6LWTFCMTK0NSd6QwQRRRv3f9p7x3upJfg4ADdmAe35cvMAR8WatqMoDjmph0_xoADtJSJUL7', 26 '43e1IZmOyQTBin9KoxYu5Bt6_199exPwlhch05g3xjeiSEtf2uFSIUGThqFBlW7OoUtj5GZPmO9kMixzOQ', 27 'ba19EIefZJz9--G3SiSvvRwyK5K714jnOjRo1_Wd4uk-6WKOE1lmp_7bQMqM0YdHDpznsD1JRgUI5Z_3Wg', 28 'ade4vYyerZYSb_DJ6w1rOPzcCynnlzNFrKerG4gqpRX1NiusWz9ttV5pKFmHHqlOpY8kdQT-me9dwguTvQ', 29 'ce6516AEgn_GL6875y3woPgY1H8vqAH4WPR1kKQ-Dl9y8RXuth0GBNOn0WAfYDG6o5SoHSCOq7Q0RYEMSQ', 30 'a267WcED8XPnh0dimtjNrDrD0ULG3vVAYi90MY4jYaNEMauatWyPWBPgaJMfQq_Wqlh7P-ZIwg4rZjkRkw', 31 ] 32 33 for key in keys: 34 print(f'---------------------------------------------------正在爬取{i}页数据-------------------------------------------------------------') 35 time.sleep(random.uniform(2,5)) 36 data = { 37 'data': key, 38 'v':2, 39 } 40 response = requests.post(url=url, headers=headers,data=data) 41 results = response.json()['data']['list'] # 获取歌曲列表 42 for item in results: 43 title = item['name'] # 歌名 44 artist = item['artist'] # 唱歌的人 45 # 获取四个下载地址 46 # try: 47 # aurl = item['url'] 48 # except: 49 # print('url不存在.') 50 # try: 51 # url128 = item['url_128'] 52 # except: 53 # print(title + '没有128url。') 54 try: 55 url320 = item['url_320'] 56 except: 57 print(title + '没有320url。') 58 try: 59 flacurl = item['url_flac'] 60 except: 61 print(title + '没有flacurl。') 62 # try: 63 # urlm4a = item['url_m4a'] 64 # except: 65 # print(title + '没有m4aurl。') 66 67 # if aurl: 68 # with open(filePath + title + '-' + artist + 'O' + '.mp3', mode='wb') as f: 69 # f.write(requests.get(url=aurl, headers=headers).content) 70 # if url128: 71 # with open(filePath + title + '-' + artist + '128' + '.mp3', mode='wb') as f: 72 # f.write(requests.get(url=url128, headers=headers).content) 73 if url320: 74 with open(filePath + title + '-' + artist + '320' + '.mp3', mode='wb') as f: 75 f.write(requests.get(url=url320, headers=headers).content) 76 if flacurl: 77 with open(filePath + title + '-' + artist + 'flac' + '.flac', mode='wb') as f: 78 f.write(requests.get(url=flacurl, headers=headers).content) 79 # if urlm4a: 80 # with open(filePath + title + '-' + artist + 'm4a' + '.mp3', mode='wb') as f: 81 # f.write(requests.get(url=urlm4a, headers=headers).content) 82 i += 1 83 84 if __name__ == "__main__": 85 downLoadMusic()