zoukankan      html  css  js  c++  java
  • 爬取酷狗音乐

    1 requests  

    2 gevent  

    3 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030') # 改系统编码 utf-8 不好使

    爬取多个歌曲  批量爬取  

    import requests

    import urllib.request

    from requests.exceptions import ReadTimeout,HTTPError,RequestException
    from bs4 import BeautifulSoup
    import urllib
    import gevent
    from gevent import Greenlet
    import socket
    import random
    import sys
    import os
    import json
    import io
    import re
    import math


    def main(url):
    print(url)

    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36'
    }
    #
    try:
    get=requests.get(url,headers)


    while get.status_code != 200:
    print("url %s"%url)

    else:
    r = get.text
    datas = json.loads(r[r.index('(') + 1:-2])
    lsits = datas['data']['lists']
    lens= len(lsits)
    print("总数->%d"%lens)
    for i in range(lens):
    print("下载编号 %s" %str(i)+"---"+lsits[i]['FileName'].replace("<em>","").replace("</em>"," "))
    print("下载编号 %s" %str(i)+"---"+lsits[i]['FileHash'].replace("<em>","").replace("</em>"," "))
    gevent.sleep(2)
    fihash = re.findall('"FileHash":"(.*?)"', r)[i]
    ps_url = "https://www.kugou.com/yy/html/index.php?r=play/getdata&hash="+fihash
    print(ps_url)
    rrr = requests.get(ps_url).text

    print("ps_url ---------------------------------")
    play_url =re.findall('"play_url":"(.*?)"',rrr)[0].replace("\","")
    print(play_url)
    print("编号{} >>> ps_url{} >>> play_url{}".format(i,ps_url,play_url))
    path_name = lsits[i]['FileName'].replace("<em>", "").replace("</em>", " ")
    path_name.replace(" ","")
    p = os.getcwd() + "/MP5/"
    with open(p+path_name+".mp3","wb") as f:
    f.write(requests.get(play_url).content)



    print(len(lsits))
    except ReadTimeout:
    print('timeout')
    except HTTPError:
    print('httperror')
    except RequestException:
    print('reqerror')




    if __name__ == '__main__':

    song_name = input("请输入歌曲名称:").split(",")
    songs = len(song_name)
    #lock = threading.Lock()
    urls=[]
    for i in range(songs):
    url = "https://songsearch.kugou.com/song_search_v2?callback=jQuery112406438825614322465_1545881092316&keyword=" + str(song_name[i]) + "&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection=1&privilege_filter=0&_=1545881092322"

    urls.append(gevent.spawn(main,url))

    gevent.joinall(urls)




  • 相关阅读:
    转载 Oracle常用函数
    转载:ASP.NET Request对象使用实例浅析
    JQuery's Ajax request a datatable
    转载 WCF VS ASPNET WEB SERVICE
    转载 浅谈C#中构造函数与析构函数
    javascript正则表达式 实现Trim()
    转载: NET中使用log4net记录日志
    Consuming WCF / ASMX / REST service using JQuery
    asp.net下使用Request.Form获取非服务器控件的值的方法
    如何把Access转成SQL Server的方法介绍
  • 原文地址:https://www.cnblogs.com/wxc1/p/10215662.html
Copyright © 2011-2022 走看看