zoukankan      html  css  js  c++  java
  • 爬取网易云排行榜

    import requests
    import json
    import MySQLdb
    from lxml import etree
    header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
                      ' (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36 Edg/8'
                      '4.0.522.40'}
    paths={"云音乐飙升榜":'19723756',"云音乐新歌榜":'3779629', "网易原创歌曲榜":'2884035',"云音乐热歌榜":'3778678'}#排行榜id
    
    
    for key in paths:
        print('==========================%s=============================='%(key))
        url = "https://music.163.com/discover/toplist?id=%s"%(paths[key])
        response = requests.get(url,headers=header)
        response.encoding='utf-8'
        txt = response.text
        html = etree.HTML(txt)
    
        #歌曲json数组
        song_json_list_str = html.xpath("//textarea[@id='song-list-pre-data']/text()")[0]
        # print(song_json_list)
        song_list = json.loads(song_json_list_str)
        index = 0
        for song in song_list:
            name = song["name"]
            songer_list = song["artists"]
    
            for songer in songer_list:
                songers= songer["name"]
            duration = song["duration"]
            song_map = {"num":index,"name":name,"songer":songers,"duration":duration,"rankName":key}
            index += 1
            print(song_map)
            #将数据存进数据库中
            mydb = MySQLdb.connect("localhost", "root", "123456", "wangyiyun", charset='utf8',autocommit=True)
            mycursor = mydb.cursor()
            sql='insert into wangyy(`num`,`name`,`songer`,`duration`,`rankName`) values(%d,"%s","%s",%d,"%s")'%(index,name,songers,duration,key)
            mycursor.execute(sql)
            mydb.commit()
        mydb.close()

  • 相关阅读:
    队列
    使用JPype实现Python调用JAVA程序
    Django和Flask对于URL尾斜杠(back slash)的处理
    数据仓库建设中的数据建模方法(转)
    python自定义logger handler
    Eclipse下.project和.classpath作用(转)
    理解python的with语句
    django常见小问题收集(转)
    windows下无法创建django工程的问题
    Excel的python读写
  • 原文地址:https://www.cnblogs.com/20190308-zlz/p/9000_zzy.html
Copyright © 2011-2022 走看看