zoukankan      html  css  js  c++  java
  • 爬取酷狗音乐

    1 requests  

    2 gevent  

    3 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030') # 改系统编码 utf-8 不好使

    爬取多个歌曲  批量爬取  

    import requests

    import urllib.request

    from requests.exceptions import ReadTimeout,HTTPError,RequestException
    from bs4 import BeautifulSoup
    import urllib
    import gevent
    from gevent import Greenlet
    import socket
    import random
    import sys
    import os
    import json
    import io
    import re
    import math


    def main(url):
    print(url)

    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36'
    }
    #
    try:
    get=requests.get(url,headers)


    while get.status_code != 200:
    print("url %s"%url)

    else:
    r = get.text
    datas = json.loads(r[r.index('(') + 1:-2])
    lsits = datas['data']['lists']
    lens= len(lsits)
    print("总数->%d"%lens)
    for i in range(lens):
    print("下载编号 %s" %str(i)+"---"+lsits[i]['FileName'].replace("<em>","").replace("</em>"," "))
    print("下载编号 %s" %str(i)+"---"+lsits[i]['FileHash'].replace("<em>","").replace("</em>"," "))
    gevent.sleep(2)
    fihash = re.findall('"FileHash":"(.*?)"', r)[i]
    ps_url = "https://www.kugou.com/yy/html/index.php?r=play/getdata&hash="+fihash
    print(ps_url)
    rrr = requests.get(ps_url).text

    print("ps_url ---------------------------------")
    play_url =re.findall('"play_url":"(.*?)"',rrr)[0].replace("\","")
    print(play_url)
    print("编号{} >>> ps_url{} >>> play_url{}".format(i,ps_url,play_url))
    path_name = lsits[i]['FileName'].replace("<em>", "").replace("</em>", " ")
    path_name.replace(" ","")
    p = os.getcwd() + "/MP5/"
    with open(p+path_name+".mp3","wb") as f:
    f.write(requests.get(play_url).content)



    print(len(lsits))
    except ReadTimeout:
    print('timeout')
    except HTTPError:
    print('httperror')
    except RequestException:
    print('reqerror')




    if __name__ == '__main__':

    song_name = input("请输入歌曲名称:").split(",")
    songs = len(song_name)
    #lock = threading.Lock()
    urls=[]
    for i in range(songs):
    url = "https://songsearch.kugou.com/song_search_v2?callback=jQuery112406438825614322465_1545881092316&keyword=" + str(song_name[i]) + "&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection=1&privilege_filter=0&_=1545881092322"

    urls.append(gevent.spawn(main,url))

    gevent.joinall(urls)




  • 相关阅读:
    servlet-servletConfig
    servlet-servletContext网站计数器
    servlet-cookie
    Android 无cp命令 mv引起cross-device link
    android使用mount挂载/system/app为读写权限,删除或替换系统应用
    android使用百度地图、定位SDK实现地图和定位功能!(最新、可用+吐槽)
    解决android sdk manager无法下载SDK 的问题
    Android APK反编译详解(附图)
    Android如何防止apk程序被反编译
    不用外部JAR包,自己实现JSP文件上传!
  • 原文地址:https://www.cnblogs.com/wxc1/p/10215662.html
Copyright © 2011-2022 走看看