zoukankan      html  css  js  c++  java
  • 爬取喜马拉雅免费有声小说

    import requests
    import re
    from bs4 import BeautifulSoup
    import json
    import math

    '''
    写文件

    '''


    def json_sanalyzes(legal):

    contents ={}
    for i in legal:
    li=[]
    for k in i:
    contents['name']=k['trackName']
    contents['src'] =k['src']
    yield contents



    # return contents
    #
    #


    def dump_load(url,id):
    header = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36'
    }
    res = requests.get(url,headers=header)
    while res.status_code!=200:
    continue
    else:
    counts = res.text
    supers=BeautifulSoup(counts,"html.parser")
    res=supers.find("h2",class_="rC5T")
    ressss=re.findall(r"<h2 class="rC5T">专辑里的声音(<!-- -->(d+)<!-- -->)",str(res))[0]#获取小说总条数
    n=math.ceil(int(ressss)/30) # 区分页漂移量
    for i in range(n):#循环请求要爬取的页面url 每页爬取30条
    url ="https://www.ximalaya.com/revision/play/album?albumId=%s&pageNum=%d&sort=-1&pageSize =30"%(id,i+1)
    contens_rs = requests.get(url,headers=header)
    while contens_rs.status_code!=200:
    continue
    else:
    rescsa=json.loads(contens_rs.content.decode())
    yield rescsa['data']['tracksAudioPlay'] #采用yield 生成器

    if __name__=="__main__":
    import os
    id = 12642314 #小说id
    url = "https://www.ximalaya.com/youshengshu/%d/"%id #请求要访问小说页面的主页面
    #dump_load(url,id)
    response = json_sanalyzes(dump_load(url,id))# 获取小说每页列表并解析出 音频地址 和 小说单张名称
    header = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36'
    }
    for i in response: # 循环下载小说内容
    dump_cont = requests.get(i['src'], headers=header)
    while dump_cont.status_code!=200:
    continue
    else:
    p = "D:/untitled2/venv/theand/小说/" # 小说存放地址
    if os.path.exists(p) == False: # 判断小说地址是否存在 不存在
    os.mkdir(p) #则创建目录
    b_name =i['name'].replace('"'," ").replace('"'," ")
    path_paths = p + b_name + ".mp3" #拼接小说单张字节名称
    path_p = path_paths.replace(' ', '')
    with open(str(path_p), "wb") as f: #写入文件音频
    f.write(requests.get(i['src']).content)

  • 相关阅读:
    #2051:Bitset(进制转化)
    #2054:A == B ?(水题坑人)
    #2045:不容易系列之三LELE的RPG难题(dp递推)
    #2037:今年暑假不AC
    #2036:改革春风吹满地
    OJ中的语言选项里G++ 与 C++的区别
    如何在CSDN上如何快速转载博客
    Python之路(第八篇)Python内置函数、zip()、max()、min()
    Python编程笔记(第一篇)Python基础语法
    Python之路(第七篇)Python作用域、匿名函数、函数式编程、map函数、filter函数、reduce函数
  • 原文地址:https://www.cnblogs.com/wxc1/p/10237354.html
Copyright © 2011-2022 走看看