zoukankan      html  css  js  c++  java
  • python爬取网易云音乐歌单音乐

    在网易云音乐中第一页歌单的url:http://music.163.com/#/discover/playlist/

    依次第二页:http://music.163.com/#/discover/playlist/?order=hot&cat=%E5%85%A8%E9%83%A8&limit=35&offset=35

    依次第三页:http://music.163.com/#/discover/playlist/?order=hot&cat=%E5%85%A8%E9%83%A8&limit=35&offset=70

    然后从歌单的查看框架的源代码:

    从图中的源代码可以得到每个歌单的url:eg:http://music.163.com/#/playlist?id=696806036

    然后相应的歌单页面中可以得到歌单中每首歌的名字和歌手:

    然后歌名歌手,在百度音乐搜索api接口获得songid,api是url = "http://sug.music.baidu.com/info/suggestion"

    截图来自:贴吧

    然后从获取到的songid从百度音乐免费API接口:http://music.baidu.com/data/music/fmlink,获取songLink进行下载,并且将songLink保存到本地.flac

    eg;http://music.baidu.com/data/music/fmlink?rate=320&songIds=242078437&type=flac


    下载结果:

    代码:

    # -*- coding: utf-8 -*-
    import re
    import urllib
    import urllib2
    import os
    import stat
    import itertools
    import re
    import sys
    import requests
    import json
    import time
    import socket
    import urlparse
    import csv
    import random
    from datetime import datetime, timedelta
    import lxml.html
    
    from zipfile import ZipFile
    from StringIO import StringIO
    from downloader import Downloader
    from bs4 import BeautifulSoup
    from HTMLParser import HTMLParser
    from itertools import product
    import sys
    reload(sys)
    sys.setdefaultencoding('utf8')
    URL = 'http://music.163.com'
    NUM = 5
    def download(url, user_agent='wswp', num_try=2):
    
        headers = {'User_agent': user_agent}
        request = urllib2.Request(url, headers=headers)
        try:
            html = urllib2.urlopen(request).read()
        except urllib2.URLError as e:
            print 'Download error', e.reason
            html = None
            if num_try > 0:
                if hasattr(e, 'code') and 500 <= e.code < 600:
                    return download(url, user_agent, num_try - 1)
        return html
    
    
    def get_song_list(url):
        html = download(url)
        res = r'<ul class="f-hide">(.*?)</ul>'
        mm = re.findall(res,html,re.S | re.M)
        #print mm
        res = r'<li><a .*?>(.*?)</a></li>'
        song_list = re.findall(res, html,re.S | re.M)
        return song_list
    
    #获取网易云歌单 eg:/playlist?id=706469943
    def get_play_list(html):
        soup = BeautifulSoup(html, "html.parser")
        results = soup.find_all(name='a', attrs={'class': 'tit f-thide s-fc0'})
        list = []
        for each in results:
            ee = each.get('href')
            list.append(ee)
        return list
    
    def download_music(url, song_name):
        print "Downloading song_name:" + song_name
        path = "songs"
        if not os.path.isdir(path):
            os.mkdir(path)
        f = open(path + '/' + song_name + '.flac', 'wb')
        f.write(download(url))
        f.close()
    
    def download_song(song_name,singer):
    
        url = "http://sug.music.baidu.com/info/suggestion"
        #百度音乐搜索获得songid
        mess = song_name + singer
        payload = {'word': mess, 'version': '2.1.1', 'from': '0'}
        r = requests.get(url, params=payload)
        contents = r.text
        d = json.loads(contents, encoding="utf-8")
        #print d
        if ('data' not in d):
            print "do not have flac"
            return 0
        if ('song' not in d["data"]):
            print "do not have flac"
            return 0
        song_id = d["data"]["song"][0]["songid"]
    
        print "song_id:"+song_id
    
        url = "http://music.baidu.com/data/music/fmlink" #百度音乐免费api接口
        '''
            http://music.baidu.com/data/music/fmlink?rate=320&songIds=242078437&type=&callback=cb_download&_t=1468380564513&format=json
        '''
        payload = {'songIds': song_id, 'type': 'mp3'}
        r = requests.get(url, params=payload)
        contents = r.text
        try:
            d = json.loads(contents, encoding="utf-8")
        except:
            return 0
        if d is not None and 'data' not in d or d['data'] == '':
            return 0
        songlink = d["data"]["songList"][0]["songLink"]
        if (len(songlink) < 10):
            print "do not have flac"
            return 0
        print "Song Source: " + songlink
        download_music(songlink,mess)
    
    def get_song_singer(url):
        html = download(url)
        soup = BeautifulSoup(html, "html.parser")
        results = soup.find_all(name='textarea', attrs={'style': 'display:none;'})
        mess = str(results[0])
        tt = len('<textarea style="display:none;">')
        result = mess[tt:]
        tt = len('</textarea>)')-1
        resu = result[:-tt]
        list = json.loads(resu, encoding="utf-8")
        singer_list = []
        for each in list:
            singer_list.append(each["artists"][0]["name"])
        return singer_list
    
    
    
    if __name__ == '__main__':
    
        num = 0
        for flag in range(1,5):
            if flag > 1:
                page = (flag - 1) * 35
                url = 'http://music.163.com/discover/playlist/?order=hot&cat=%E5%85%A8%E9%83%A8&limit=35&offset='+str(page)
            else:
                url = 'http://music.163.com/discover/playlist'
            print url
            html = download(url)
            list = get_play_list(html)
            for i in list:
                song_list_url = URL + i
                print song_list_url
                singer_list = get_song_list(song_list_url)
                singer_name = get_song_singer(song_list_url)
                tt = len(singer_list)
                mm = len(singer_name)
                index = min(tt,mm)
                num = num + mm
                for j in range(0, index):
                    print singer_name[j]
                    print singer_list[j]
                    download_song(singer_list[j],singer_name[j])
                    print "
    "
    
        print "Download " + str(num) + " music
    "
    
    
  • 相关阅读:
    217。数据中是否有重复元素(哈希表/set简法)
    悟空、悟能、悟净的理解
    Scala中Self Types实战详解之Scala学习笔记-46
    Scala中Infix Type实战详解之Scala学习笔记-45
    Scala中复合类型实战详解之Scala学习笔记-44
    Scala中结构类型实战详解之Scala学习笔记-43
    Scala中路径依赖代码实战详解之Scala学习笔记-42
    Scala中链式调用风格的实现代码实战及其在Spark编程中的广泛运用之Scala学习笔记-41
    Scala中Variance代码实战及其在Spark中的应用源码解析之Scala学习笔记-40
    Scala类型约束代码实战及其在Spark中的应用源码解析之Scala学习笔记-39
  • 原文地址:https://www.cnblogs.com/chenyang920/p/6851486.html
Copyright © 2011-2022 走看看