zoukankan      html  css  js  c++  java
  • Pyhon信息采集

    Pyhon信息采集 - 喜马拉雅专辑歌曲

    setting.py

    # 喜马拉雅URL
    XMLY_URL = "https://www.ximalaya.com/revision/play/album?albumId=%s&pageNum=%s&sort=-1&pageSize=30"
    HEADER = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
    }
    
    
    # 数据库配置
    import pymongo
    conn = pymongo.MongoClient("127.0.0.1",27017)
    MONGODB = conn["db3"]
    
    
    # 目录配置
    MUSIC_PATH = "Music"
    COVER_PATH = "Cover"
    

    data.py

    import time
    
    from setting import XMLY_URL, HEADER, MONGODB, MUSIC_PATH, COVER_PATH
    import requests, os
    from uuid import uuid4
    
    my_url = XMLY_URL % ("17514344", "1")
    
    res = requests.get(my_url, headers=HEADER)
    data = res.json()
    content_list = []
    
    for music_info in data.get("data").get("tracksAudioPlay"):
        music = {
            "music": "",
            "cover": "",
            "title": ""
        }
    
        filename = uuid4()
    
        audio = requests.get(music_info.get("src"))
        audio_path = os.path.join(MUSIC_PATH, f"{filename}.mp3")
        with open(audio_path, "wb") as f:
            f.write(audio.content)
    
        cover = requests.get("http:" + music_info.get("trackCoverPath"))
        print(cover, "cover")
        cover_path = os.path.join(COVER_PATH, f"{filename}.jpg")
        with open(cover_path, "wb") as f:
            f.write(cover.content)
    
        music["cover"] = f"{filename}.jpg"
        music["music"] = f"{filename}.mp3"
        music["title"] = music_info.get("trackName")
    
        content_list.append(music)
    
        time.sleep(0.2)
        # MONGODB.content.insert_one(music)
    
    MONGODB.content.insert_many(content_list)
  • 相关阅读:
    织梦DEDEcms首页调用文档整篇内容
    dedecms专题列表页不显示标题的解决办法
    怎么让织梦文章按照权重排序
    Codeforces274B
    HDU5693
    HDU2476
    POJ3613
    「LibreOJ NOIP Round #1」旅游路线
    Educational Codeforces Round 48
    组合博弈学习笔记
  • 原文地址:https://www.cnblogs.com/konghui/p/10900587.html
Copyright © 2011-2022 走看看