zoukankan      html  css  js  c++  java
  • 爬取酷狗音乐

    1 requests  

    2 gevent  

    3 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030') # 改系统编码 utf-8 不好使

    爬取多个歌曲  批量爬取  

    import requests

    import urllib.request

    from requests.exceptions import ReadTimeout,HTTPError,RequestException
    from bs4 import BeautifulSoup
    import urllib
    import gevent
    from gevent import Greenlet
    import socket
    import random
    import sys
    import os
    import json
    import io
    import re
    import math


    def main(url):
    print(url)

    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36'
    }
    #
    try:
    get=requests.get(url,headers)


    while get.status_code != 200:
    print("url %s"%url)

    else:
    r = get.text
    datas = json.loads(r[r.index('(') + 1:-2])
    lsits = datas['data']['lists']
    lens= len(lsits)
    print("总数->%d"%lens)
    for i in range(lens):
    print("下载编号 %s" %str(i)+"---"+lsits[i]['FileName'].replace("<em>","").replace("</em>"," "))
    print("下载编号 %s" %str(i)+"---"+lsits[i]['FileHash'].replace("<em>","").replace("</em>"," "))
    gevent.sleep(2)
    fihash = re.findall('"FileHash":"(.*?)"', r)[i]
    ps_url = "https://www.kugou.com/yy/html/index.php?r=play/getdata&hash="+fihash
    print(ps_url)
    rrr = requests.get(ps_url).text

    print("ps_url ---------------------------------")
    play_url =re.findall('"play_url":"(.*?)"',rrr)[0].replace("\","")
    print(play_url)
    print("编号{} >>> ps_url{} >>> play_url{}".format(i,ps_url,play_url))
    path_name = lsits[i]['FileName'].replace("<em>", "").replace("</em>", " ")
    path_name.replace(" ","")
    p = os.getcwd() + "/MP5/"
    with open(p+path_name+".mp3","wb") as f:
    f.write(requests.get(play_url).content)



    print(len(lsits))
    except ReadTimeout:
    print('timeout')
    except HTTPError:
    print('httperror')
    except RequestException:
    print('reqerror')




    if __name__ == '__main__':

    song_name = input("请输入歌曲名称:").split(",")
    songs = len(song_name)
    #lock = threading.Lock()
    urls=[]
    for i in range(songs):
    url = "https://songsearch.kugou.com/song_search_v2?callback=jQuery112406438825614322465_1545881092316&keyword=" + str(song_name[i]) + "&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection=1&privilege_filter=0&_=1545881092322"

    urls.append(gevent.spawn(main,url))

    gevent.joinall(urls)




  • 相关阅读:
    SynEdit(Delphi XE7)的安装和基本使用
    uniConnection断线重联(tag属性颇有深意,这样就可以在某些情况下,不用继承实现新控件就可以达到自己的目的)
    CheckSynchronize实现的不必要的复杂
    Delphi中Indy 10的安装和老版本的卸载
    JavaScript2
    C#中使用REDIS
    jQuery多文件
    Node+Express+MongoDB + Socket.io搭建实时聊天应用
    jQuery选取和操纵元素的特点
    Mvc 6 中创建 Web Api
  • 原文地址:https://www.cnblogs.com/wxc1/p/10215662.html
Copyright © 2011-2022 走看看