zoukankan      html  css  js  c++  java
  • ximalaya-spider

    import requests
    import parsel, re, json
    
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36',
    }
    path = "./video/"
    for i in range(1):
        url = 'https://www.ximalaya.com/youshengshu/4256765/p%d/' % i
        response = requests.get(url, headers=headers, proxies=proxies)
        html_data = response.text
    
        selector = parsel.Selector(html_data)
        lis = selector.xpath('//*[@id="anchor_sound_list"]/div[2]/ul/li')
    
        for li in lis:
            title = li.xpath('.//a/@title').get()
    
            href = li.xpath('.//a/@href').get()
    
            m4a_id = href.split('/')[-1]
            video_url = f'https://www.ximalaya.com/revision/play/v1/audio?id={m4a_id}&ptype=1'
            print("开始下载音频数据:%s" % title)
            m4a_str = requests.get(url=video_url, headers=headers, proxies=proxies).text
            m4a_dict = json.loads(m4a_str)
            m4a_url = m4a_dict['data']['src']
            m4a_data = requests.get(m4a_url, headers=headers, proxies=proxies).content
            pattern = r'[\/:*?"<>|
    ]+'
            pat = re.compile(pattern)
            sign = pat.search(title)
            if sign:
                new_title = re.sub(pattern, '_', title)
                with open(path + new_title + '.mp3', "wb") as w:
                    w.write(m4a_data)
                print("%s音频数据保存完毕" % title)
    
            else:
                with open(path + title + '.mp3', "wb") as w:
                    w.write(m4a_data)
                print("%s音频数据保存完毕" % title)
    

      

  • 相关阅读:
    《构建之法》第8、9、10章的读后感和第一个sprint总结
    实验三 进程调度模拟程序
    构建之法第6、7章的读后感
    实验二 作业调度模拟程序
    0415博客园评价
    0414复利计算6.0-----结对编程
    0408~送给小伙伴的汉堡包
    学习进度条
    Sprint three
    sprint one
  • 原文地址:https://www.cnblogs.com/hello-python2020/p/14187350.html
Copyright © 2011-2022 走看看