zoukankan      html  css  js  c++  java
  • m3u8视频下载加解密系列_练手spider_不定时长期此贴更新

    本次目标  http://www.qiqi.la/vod-detail-id-46194.html
        目的,down魔道祖师,实现
        前期分析文件得到以下粗略步骤
    1 进入二级页面,找到  
    <iframe width="100%" height="480" src="https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf" frameborder="0" allowfullscreen=""></iframe>
        得到网址
    2 访问      https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf
        需要带上协议头
        Referer: https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf
        User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36    
        返回另一个页面response_1
        得到文件标题 <title>重新压制魔道祖师 前尘篇02 序章 诛邪(下)福利加长版 高清(480P).qlv</title>
    3 在response_1
        得到:var main = "/20180710/4671_a5ef5a19/index.m3u8?sign=b0023d8b455da27a4294b38c7815f7b3";
        拼合网页:https://cn2.zuixinbo.com/20180710/4671_a5ef5a19/index.m3u8?sign=b0023d8b455da27a4294b38c7815f7b3
        访问:得到返回结果
            #EXTM3U
            #EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=800000,RESOLUTION=1080x608
            1000k/hls/index.m3u8
    4 拼合 https://cn2.zuixinbo.com/20180710/4671_a5ef5a19/1000k/hls/index.m3u8
        带协议访问
            Referer: https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf
            User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36
        得到.ts下载文件路径
        分段下载
    5 拼合.ts文件

    有空更新完整代码
     2018-10-21
        开始编写代码 

         发现在重新写代码的过程中,发现直接在播放页面就有全部的播放地址,只不过是用usc2的
            编码转换了一下,我们需要把其转换成ansi编码
        2 OK,这下直接拿到播放地址,做一下格式化的工作,进行第2步解析,上面的第一步工作算是白费了一片心思

        3 按照上面步骤依次完成,基本没问题

        

    # -*- coding:utf-8 -*-
    # @time:2018-10-21 14:43
    # @Auther:1043453579@qq.com
    
    from urllib.request import Request
    from urllib.request import urlopen
    import re,time,os
    from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor
    
    static_url_1 = 'http://www.qiqi.la/vod-detail-id-46194.html'
    class A(object):
        def __init__(self,url,e=15):
            self.header= {'user-agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'}
            self.path = os.getcwd()
            self.e = e
            self.static_url='https://cn2.zuixinbo.com'
    
        def num_of_e(self,url_2):#进入二级页面
            res = urlopen(Request(url=url_2,headers=self.header)).read()
            res = res.decode()
    
            title = self.take_middle_text(res,'<title>',txt_e='</title>') #标题
            var_main=re.search('var main = "(.*?)";',res).group(1) #访问下级地址
            if not var_main:
                exit()
            return {'var_main':var_main,'referer':url_2,'标题':title}
    
    
        def open_3(self,url,referer='',**kwargs):
            url=self.static_url+url
            a={'Referer': referer}
            a.update(self.header)
            res = urlopen(Request(url=url,headers=a)).read()
            res = res.decode()
            _=self.take_middle_text(res,'1080x608','.m3u8')+'.m3u8' #得到ts视频地址
            return {'url':_.split(),'regerer1':url}
    
    
        def open_4(self,url,referer1='',**kwargs):
            referer=referer1
            referer= referer.split('/')
            referer=referer[0:-1]
            referer.append(*url)
            url='/'.join(referer)
            print(url)
            a = {'Referer': referer1}
            a.update(self.header)
            res = urlopen(Request(url=url,headers=a)).read()
            res = res.decode()
            ts_list=[]
            for i in res.split('
    '):
                try:
                    if i[0]!='#':
                        ts_list.append(i)
                except:pass
            return {'ts_list':ts_list,'url':url}
    
        def take_middle_text(self,txt, txt_s, txt_e='', seeks=0, seeke=0):
            # 取出中间文本,真返回中间文本,假返回False
            # seeks有传参,会按照取前几位取值
            # seeke有传参,会按照取后几位取值
            try:
                if txt_e or seeks or seeke:
                    pass
                else:
                    raise 1
                s_1 = txt.find(txt_s)
                if s_1 == -1:
                    raise 1
                l_1 = len(txt_s)
                if txt_e:
                    s_2 = txt.find(txt_e)
                    if s_1 == -1 or s_2 == -1:
                        return False
                    return txt[s_1 + l_1:s_2]
                if seeks:
                    return txt[s_1 - seeks:s_1]
                if seeke:
                    return txt[s_1 + l_1:s_1 + l_1 + seeke]
            except:
                return '传参错误或未找到传参文本'
    
        def down_ts(self,dict,path_1):
            url = os.path.dirname(dict['url'])+'/'
            ts_list=dict['ts_list']
            for i in ts_list:
                print(path_1,'这里是path_1')
                path = os.path.join(path_1, i)
                print(path,'这里是path_ts文件网址')
                if os.path.exists(path):
                    print('已存在,跳过')
                else:
                    try:
                        res = urlopen(Request(url=url+i,headers=self.header)).read()
                        with open(path,'wb') as f:
                            f.write(res)
                        print('成功写入一条')
                    except:
                        print('写入失败')
    
        def main(self,url):
            dict_1 = self.num_of_e(url) #'这里返回一个字典 '
            dict_2 = self.open_3(dict_1['var_main'],dict_1['referer'])
            dict_3 = self.open_4(dict_2['url'], dict_2['regerer1']) #这里的url未提纯
            title = dict_1['标题']
            path = os.path.join(self.path,title)
            #@print(title,'这里是标题')
            if not os.path.exists(path):
                os.mkdir(path) #没有就创建一个新的目录
            self.down_ts(dict_3,path)
    
    
    if __name__ == '__main__':
    
        ex = ProcessPoolExecutor(2)
        a_1 = A(static_url_1, 15)
        with open('2.txt', 'r', encoding='utf8') as f:
            for i in f:
                a = i.split()[0].split('$')[1].split('#')[0]
                print(ex.submit(a_1.main,a).result())
            ex.shutdown()
    View Code---第一版,用双进程当作并发,代理未加,隔几天再优化一下,先这样吧

    2018-10-30

    # -*- coding:utf-8 -*-
    # @time:2018-10-21 14:43
    # @Auther:1043453579@qq.com
    
    from urllib.request import Request
    from urllib.request import urlopen
    import re,time,os
    from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor
    
    static_url_1 = 'http://www.qiqi.la/vod-detail-id-46194.html'
    class A(object):
        def __init__(self):
            self.header= {'user-agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'}
            self.path = os.getcwd()
            #self.static_url='https://cn2.zuixinbo.com'
            self.static_url = ''
            #self.r = redis.Redis(host='127.0.0.1',port=6379,db=0)
    
        def get_proxy(self):
            return {'http':self.r.randomkey().deocode()}
    
        def down_1(self,url,referer='',code=True):
            while True:
                #proxy = self.get_proxy()
                #这里设置代理
                try:
                    _ = self.header
                    if referer:
                        a = {'Referer': referer}
                        _.update(a)
                    res = urlopen(Request(url=url, headers=_),timeout=60).read()
                    if code:
                        res = res.decode()
                    if res:
                        time.sleep(1)
                        return res
                    else:
                        raise 1
                except Exception as e:
                    print('请求失败',url)
                    with open('3.txt','a+') as f:
                        f.write(url)
                        f.write('
    ')
                    time.sleep(10)
                    
    
        def num_of_e(self,url_2):#进入二级页面
            res = self.down_1(url_2)
            title = self.take_middle_text(res,'<title>',txt_e='</title>') #标题
            var_main=re.search('var main = "(.*?)";',res).group(1) #访问下级地址
            if not var_main:
                exit()
            return {'var_main':var_main,'referer':url_2,'标题':title}
    
    
        def open_3(self,url,referer='',**kwargs):
            url=self.static_url+url
            res = self.down_1(url,referer=referer)
            _=self.take_middle_text(res,'1080x608','.m3u8')+'.m3u8' #得到ts视频地址
            return {'url':_.split(),'regerer1':url}
    
    
        def open_4(self,url,referer1='',**kwargs):
            referer=referer1
            referer= referer.split('/')
            referer=referer[0:-1]
            referer.append(*url)
            url='/'.join(referer)
            print(url)
            res = self.down_1(url,referer=referer1)
            ts_list=[]
            for i in res.split('
    '):
                try:
                    if i[0]!='#':
                        ts_list.append(i)
                except:pass
            return {'ts_list':ts_list,'url':url}
    
        def take_middle_text(self,txt, txt_s, txt_e='', seeks=0, seeke=0):
            # 取出中间文本,真返回中间文本,假返回False
            # seeks有传参,会按照取前几位取值
            # seeke有传参,会按照取后几位取值
            try:
                if txt_e or seeks or seeke:
                    pass
                else:
                    raise 1
                s_1 = txt.find(txt_s)
                if s_1 == -1:
                    raise 1
                l_1 = len(txt_s)
                if txt_e:
                    s_2 = txt.find(txt_e)
                    if s_1 == -1 or s_2 == -1:
                        return False
                    return txt[s_1 + l_1:s_2]
                if seeks:
                    return txt[s_1 - seeks:s_1]
                if seeke:
                    return txt[s_1 + l_1:s_1 + l_1 + seeke]
            except:
                return '传参错误或未找到传参文本'
    
        def down_ts(self,dict,path_1):
            url = os.path.dirname(dict['url'])+'/'
            ts_list=dict['ts_list']
            for i in ts_list:
                path = os.path.join(path_1, i)
                if os.path.exists(path):
                    print('已存在,跳过',i)
                else:
                    try:
                        res = urlopen(Request(url=url+i,headers=self.header),timeout=60).read()
                        time.sleep(1)
                        if res:
                            with open(path,'wb') as f:
                                f.write(res)
                            print('成功写入一条',i)
                        else:
                            raise 1
                    except Exception as e:
                        with open('3.txt','a+') as f:
                            _ = '-'.join([str(i) for i in time.localtime()[0:6]])
                            f.write(_ +'###'+e+'$$$'+url)
                            f.write('
    ')
                            print('写入失败',i,e)
            time.sleep(5)
    
        def main(self,url):
            _ = url.split('com')
            self.static_url=_[0]+'com'
            dict_1 = self.num_of_e(url) #'这里返回一个字典 '
            dict_2 = self.open_3(dict_1['var_main'],dict_1['referer'])
            dict_3 = self.open_4(dict_2['url'], dict_2['regerer1']) #这里的url未提纯
            title = dict_1['标题']
            path = os.path.join(self.path,title)
            #@print(title,'这里是标题')
            if not os.path.exists(path):
                os.mkdir(path) #没有就创建一个新的目录
            self.down_ts(dict_3,path)
    
    if __name__ == '__main__':
        ex = ProcessPoolExecutor(3)
        a_1 = A()
        with open('2.txt', 'r', encoding='utf8') as f:
            for i in f:
                a = i.split()[0].split('$')[1].split('#')[0]
                ex.submit(a_1.main,a)
            ex.shutdown()
    
    #BUG在网页的提交网址中
    View Code--代理未加,需要的请自行加上代理,稍微优化了一下,里面的2.txt是下载地址,见下面
    第01集$https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf#
    第02集$https://cn2.zuixinbo.com/share/fbad540b2f3b5638a9be9aa6a4d8e450#
    第03集$https://v-xunlei.com/share/c457d7ae48d08a6b84bc0b1b9bd7d474#
    第04集$https://v-xunlei.com/share/8db1d4a631a6e9a24e2c0e842e1f1772#
    第05集$https://v-xunlei.com/share/197f76fe309657064dbec74d9eea4be4#
    第06集$https://v-xunlei.com/share/92b70a527191ca64ca2df1cc32142646#
    第07集$https://v-xunlei.com/share/abc99d6b9938aa86d1f30f8ee0fd169f#
    第08集$https://v-xunlei.com/share/22cdb13a83f73ccd1f79ffaf607b0621#
    第09集$https://v-xunlei.com/share/aceacd5df18526f1d96ee1b9714e95eb#
    第10集$https://v-6-cn.com/share/075b051ec3d22dac7b33f788da631fd4#
    第11集$https://v-6-cn.com/share/4670c07872d5314c6ad6ffa633d4a059#
    第12集$https://v-xunlei.com/share/2bba9f4124283edd644799e0cecd45ca#
    第13集$https://v-cntv-cn.com/share/d87aa42cd08ba8612664a73dbdb64221#
    第14集$https://v-cntv-cn.com/share/63ceea56ae1563b4477506246829b386#
    第15集$https://v-cntv-cn.com/share/e8a69bf65aefc23d0f360ab695e9eac7
    View Code--这里是下载地址

    2020-05-02

    # -*- coding:utf-8 -*-
    # @time:2018-10-21 14:43
    # @Auther:1043453579@qq.com
    
    from urllib.request import Request
    from urllib.request import urlopen
    import re,  os
    
    class A(object):
        def __init__(self):
            self.header = {
                'user-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36',
                #':authority':'m3u8.xingc.vip'
            }
            self.path = os.getcwd()
            # self.static_url='https://cn2.zuixinbo.com'
            self.static_url = ''
            # self.r = redis.Redis(host='127.0.0.1',port=6379,db=0)
    
        def get_proxy(self):
            return {'http': self.r.randomkey().deocode()}
    
        def down_1(self, url, referer='', code=True):
            while True:
                # proxy = self.get_proxy()
                # 这里设置代理
                try:
                    _ = self.header
                    if referer:
                        a = {'Referer': referer}
                        _.update(a)
                    res = urlopen(Request(url=url, headers=_), timeout=5).read()
                    if code:
                        res = res.decode()
                    if res:
                        return res
                    else:
                        raise 1
                except Exception as e:
                    print('请求失败', url)
    
    
        def num_of_e(self, url_2):  # 进入二级页面
            res = self.down_1(url_2)
            if res:
                return res
            else:
                return None
    
    
    
        def take_middle_text(self, txt, txt_s, txt_e='', seeks=0, seeke=0):
            # 取出中间文本,真返回中间文本,假返回False
            # seeks有传参,会按照取前几位取值
            # seeke有传参,会按照取后几位取值
            try:
                if txt_e or seeks or seeke:
                    pass
                else:
                    raise 1
                s_1 = txt.find(txt_s)
                if s_1 == -1:
                    raise 1
                l_1 = len(txt_s)
                if txt_e:
                    s_2 = txt.find(txt_e)
                    if s_1 == -1 or s_2 == -1:
                        return False
                    return txt[s_1 + l_1:s_2]
                if seeks:
                    return txt[s_1 - seeks:s_1]
                if seeke:
                    return txt[s_1 + l_1:s_1 + l_1 + seeke]
            except:
                return '传参错误或未找到传参文本'
    
        def down_ts(self, list_1, path_1,url,temp_int):
            import requests
            for i in list_1:
                path = os.path.join(path_1, i)
                if os.path.exists(path):
                    print('已存在,跳过', i)
                else:
                    while True:
                        try:
                            res = requests.get(url=url + i, headers=self.header,timeout=5)
                            if res:
                                with open(path, 'wb') as f:
                                    f.write(res.content)
                                print('成功写入一条', i,temp_int)
                                break
                        except Exception as e:
                            print('requests写入失败',temp_int)
    
    
        def main(self, url,title):
            _ = url.split('/')
            _url="/".join(_[0:-1])+"/"
            dict_1 = self.num_of_e(url)  # '这里返回m3u8的内容'
            m3u8_list=re.findall('d+.ts',dict_1)
            print(m3u8_list)
            path = os.path.join(self.path, title)
            if not os.path.exists(path):
                os.mkdir(path)  # 没有就创建一个新的目录
            self.down_ts(m3u8_list, path,_url,len(m3u8_list))
    
    if __name__ == '__main__':
        #ex = ProcessPoolExecutor(3)
        a_1 = A()
        a='https:***/index.m3u8'
        a_1.main(a,'name')
    m3u8单文件,单线程下载(去代理)

    2020-05-22 02:31:31

    发现最近两年的视频基本都做了加密措施,综合网上的帖子,没有啥值得使用的地方,于此写下

    aes-128加密系列 :男人的小视频梦想

    使用工具:winhex

    使用库命令:binascii.b2a_hex("二进文本")

    有一朋友问我,一个小视频APP站做加密,怎么解不出来

    m3u8文件上小图,如下:

     解密如下:

    好的,代码就不发了,自行根据上面的代码进行整改,解密代码如下

    import binascii
    from Crypto.Cipher import AES
    with open('0.ts','rb+') as f:
        a = f.read()
        print(len(a)%16) #取余为0 就加16位,不为0 就加(16-余数)
        a=a+b'0'*16 #取余为0 就加16位,不为0 就加(16-余数)
        print(a)
        cryptos = AES.new(binascii.a2b_hex('5a43c7619623bc347fa7dcea3ddfb1b2'), AES.MODE_CBC,bytes.fromhex('3e420d580bd9244dd608850e0dec7ac8'))
        c= cryptos.decrypt(a)
        with open('0_2.ts','wb') as c1:
            c1.write(c)
    View Code--解密AES
  • 相关阅读:
    编译和和运行amor
    用好C语言的中库,系统错误的处理
    C语言中的宏
    时隔多年,再次实现的链表
    脚本更改桌面背景
    python爬虫 一个security的RSA加密爬虫
    mysql 8.0版本
    mysql5.7的并行复制
    kinshard中间件问题
    Springboot2-@Transactional 事务不生效
  • 原文地址:https://www.cnblogs.com/Skyda/p/9823607.html
Copyright © 2011-2022 走看看