zoukankan      html  css  js  c++  java
  • 爬取酷狗音乐

    1 requests  

    2 gevent  

    3 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030') # 改系统编码 utf-8 不好使

    爬取多个歌曲  批量爬取  

    import requests

    import urllib.request

    from requests.exceptions import ReadTimeout,HTTPError,RequestException
    from bs4 import BeautifulSoup
    import urllib
    import gevent
    from gevent import Greenlet
    import socket
    import random
    import sys
    import os
    import json
    import io
    import re
    import math


    def main(url):
    print(url)

    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36'
    }
    #
    try:
    get=requests.get(url,headers)


    while get.status_code != 200:
    print("url %s"%url)

    else:
    r = get.text
    datas = json.loads(r[r.index('(') + 1:-2])
    lsits = datas['data']['lists']
    lens= len(lsits)
    print("总数->%d"%lens)
    for i in range(lens):
    print("下载编号 %s" %str(i)+"---"+lsits[i]['FileName'].replace("<em>","").replace("</em>"," "))
    print("下载编号 %s" %str(i)+"---"+lsits[i]['FileHash'].replace("<em>","").replace("</em>"," "))
    gevent.sleep(2)
    fihash = re.findall('"FileHash":"(.*?)"', r)[i]
    ps_url = "https://www.kugou.com/yy/html/index.php?r=play/getdata&hash="+fihash
    print(ps_url)
    rrr = requests.get(ps_url).text

    print("ps_url ---------------------------------")
    play_url =re.findall('"play_url":"(.*?)"',rrr)[0].replace("\","")
    print(play_url)
    print("编号{} >>> ps_url{} >>> play_url{}".format(i,ps_url,play_url))
    path_name = lsits[i]['FileName'].replace("<em>", "").replace("</em>", " ")
    path_name.replace(" ","")
    p = os.getcwd() + "/MP5/"
    with open(p+path_name+".mp3","wb") as f:
    f.write(requests.get(play_url).content)



    print(len(lsits))
    except ReadTimeout:
    print('timeout')
    except HTTPError:
    print('httperror')
    except RequestException:
    print('reqerror')




    if __name__ == '__main__':

    song_name = input("请输入歌曲名称:").split(",")
    songs = len(song_name)
    #lock = threading.Lock()
    urls=[]
    for i in range(songs):
    url = "https://songsearch.kugou.com/song_search_v2?callback=jQuery112406438825614322465_1545881092316&keyword=" + str(song_name[i]) + "&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection=1&privilege_filter=0&_=1545881092322"

    urls.append(gevent.spawn(main,url))

    gevent.joinall(urls)




  • 相关阅读:
    c#结构体、打他table、excel、csv互转
    WPF 自定义图表(柱状图,曲线图)
    NemaStudio船舶模拟软件下载及破解
    点双连通分量
    HDU4612 Warm up
    边双连通分量
    [Jsoi2010]连通数
    Intern Day73
    Intern Day72
    Intern Day70
  • 原文地址:https://www.cnblogs.com/wxc1/p/10215662.html
Copyright © 2011-2022 走看看