zoukankan      html  css  js  c++  java
  • 爬取ts类型视频文件并且合并成mp4文件

    # !/usr/bin/env python3
    # -*- coding: utf-8 -*-
    # @Time : 2020/12/21 上午8:51
    # @Author : SR
    # @Email : srcoder@1163.com
    # @File : spider.py
    # @Software: PyCharm
    
    import os
    
    import requests
    
    from multiprocessing.pool import ThreadPool
    
    
    class SpiderMovieFromChenYu:
        def __init__(self, save_ts_path, save_movie_path, fail_ts_list=[], ):
    
            self.save_ts_path = save_ts_path
            self.save_movie_path = save_movie_path
            self.fail_ts_list = fail_ts_list
    
            self.headers = {
                'Referer': 'http://www.chenyutv.com/',
    
                'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
            }
    
        def mkdir_directory(self):
            if not os.path.exists(self.save_ts_path):
                os.mkdir(self.save_ts_path)
            if not os.path.exists(self.save_movie_path):
                os.mkdir(self.save_movie_path)
    
        def get_ts(self, number, flag=False):
    
            play_url = 'https://sina.com-h-sina.com/20180815/9998_f9aa34bf/1000k/hls/c0cdc4673f4%03d.ts' % number
    
            ts_number = play_url.split('/')[-1]  # 获取ts编号
    
            if ts_number not in os.listdir(self.save_ts_path):  # 判断该ts是否已经下载
    
                try:
                    session = requests.session()
                    response = session.get(play_url, headers=self.headers, timeout=60)  # 进行数据请求
    
                    if response.status_code == 200:
    
                        with open(os.path.join(self.save_ts_path, ts_number), 'wb') as f:  # 读取文件
                            f.write(response.content)  # 写入数据
                            f.close()
                            if flag:  # 判断失败的ts再一次下载是否成功 
                                self.fail_ts_list.remove(number)  # 如果成功从失败列表移除
                except Exception as e:
    
                    #  判断失败的ts文件序号是否已经存在在失败的列表下
                    if number not in self.fail_ts_list:
                        #  不存在添加到ts列表中
                        self.fail_ts_list.append(number)
    
        def check_ts(self):
            print("开始检查:")
            print(self.fail_ts_list)
            while self.fail_ts_list:  # 通过判断列表是否有值进行数据循环
                for number in self.fail_ts_list:  # 获取单个的ts文件序号
                    self.get_ts(number, True)  # 数据下载
                    print("%s:下载完毕" % number)
                    print(self.fail_ts_list)
            print("ts 文件下载完成!")
            self.get_video()  # 下载成功之后将数据转换成mp4文件
    
        def get_video(self):
    
            ts_list = os.listdir(self.save_ts_path)  # 获取所有的ts文件
            ts_list.sort()  # 将ts文件进行排序
            ts_lists = [ts for ts in ts_list]
    
            for ts in ts_lists:
                with open(os.path.join(self.save_ts_path, ts), 'rb') as f1:
                    with open(os.path.join(self.save_movie_path, '明日的我与昨日你的约会.mp4'), 'ab') as f2:
                        f2.write(f1.read())
                        print("%s:写入完成" % ts)
    
    
    if __name__ == '__main__':
        min_number = int(input('请输入ts的起始数字>>:').strip())
        max_number = int(input('请输入ts的结尾数字>>:').strip()) + 1
        save_ts_path = input('请输入ts保存文件路径>>:').strip()
        save_movie_path = input('请输入视频保存文件路径>>:').strip()
    
        spider = SpiderMovieFromChenYu(save_ts_path, save_movie_path)
        spider.mkdir_directory()
        pool = ThreadPool(100)
        pool.map(spider.get_ts, range(min_number, max_number))
        pool.close()
        pool.join()
    
        spider.get_ts()
  • 相关阅读:
    Quicksum -SilverN
    uva 140 bandwidth (好题) ——yhx
    uva 129 krypton factors ——yhx
    uva 524 prime ring problem——yhx
    uva 10976 fractions again(水题)——yhx
    uva 11059 maximum product(水题)——yhx
    uva 725 division(水题)——yhx
    uva 11853 paintball(好题)——yhx
    uva 1599 ideal path(好题)——yhx
    uva 1572 self-assembly ——yhx
  • 原文地址:https://www.cnblogs.com/SR-Program/p/14171353.html
Copyright © 2011-2022 走看看