zoukankan      html  css  js  c++  java
  • 爬取酷狗音乐

    1 requests  

    2 gevent  

    3 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030') # 改系统编码 utf-8 不好使

    爬取多个歌曲  批量爬取  

    import requests

    import urllib.request

    from requests.exceptions import ReadTimeout,HTTPError,RequestException
    from bs4 import BeautifulSoup
    import urllib
    import gevent
    from gevent import Greenlet
    import socket
    import random
    import sys
    import os
    import json
    import io
    import re
    import math


    def main(url):
    print(url)

    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36'
    }
    #
    try:
    get=requests.get(url,headers)


    while get.status_code != 200:
    print("url %s"%url)

    else:
    r = get.text
    datas = json.loads(r[r.index('(') + 1:-2])
    lsits = datas['data']['lists']
    lens= len(lsits)
    print("总数->%d"%lens)
    for i in range(lens):
    print("下载编号 %s" %str(i)+"---"+lsits[i]['FileName'].replace("<em>","").replace("</em>"," "))
    print("下载编号 %s" %str(i)+"---"+lsits[i]['FileHash'].replace("<em>","").replace("</em>"," "))
    gevent.sleep(2)
    fihash = re.findall('"FileHash":"(.*?)"', r)[i]
    ps_url = "https://www.kugou.com/yy/html/index.php?r=play/getdata&hash="+fihash
    print(ps_url)
    rrr = requests.get(ps_url).text

    print("ps_url ---------------------------------")
    play_url =re.findall('"play_url":"(.*?)"',rrr)[0].replace("\","")
    print(play_url)
    print("编号{} >>> ps_url{} >>> play_url{}".format(i,ps_url,play_url))
    path_name = lsits[i]['FileName'].replace("<em>", "").replace("</em>", " ")
    path_name.replace(" ","")
    p = os.getcwd() + "/MP5/"
    with open(p+path_name+".mp3","wb") as f:
    f.write(requests.get(play_url).content)



    print(len(lsits))
    except ReadTimeout:
    print('timeout')
    except HTTPError:
    print('httperror')
    except RequestException:
    print('reqerror')




    if __name__ == '__main__':

    song_name = input("请输入歌曲名称:").split(",")
    songs = len(song_name)
    #lock = threading.Lock()
    urls=[]
    for i in range(songs):
    url = "https://songsearch.kugou.com/song_search_v2?callback=jQuery112406438825614322465_1545881092316&keyword=" + str(song_name[i]) + "&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection=1&privilege_filter=0&_=1545881092322"

    urls.append(gevent.spawn(main,url))

    gevent.joinall(urls)




  • 相关阅读:
    委托
    SQL Server 2012 学习笔记5
    SQL Server 2012 学习笔记4
    SQL Server 2012 学习笔记3 增查改删
    SQL Server 2012 学习笔记2
    SQL Server 2012 学习笔记1
    PCD文件去除曲率的脚本
    pcl曲面网格模型的三种显示方式
    pcl计算样点法向并显示
    Markdown的使用---现学现用
  • 原文地址:https://www.cnblogs.com/wxc1/p/10215662.html
Copyright © 2011-2022 走看看