zoukankan      html  css  js  c++  java
  • python爬虫:登录百度账户,并上传文件到百度云盘

    login.js文件:

    /**
     * Created by resolvewang on 2017/4/15.
     */
    function getGid() {
        return "xxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (e) {
            var t = 16 * Math.random() | 0, n = "x" == e ? t : 3 & t | 8;
            return n.toString(16)
        }).toUpperCase()
    }
    
    function  getCallback() {
        return "bd__cbs__" + Math.floor(2147483648 * Math.random()).toString(36)
    }

    Pyhton实现代码:

    #-*- coding:utf-8 -*-
    __author__ = 'Administrator'
    
    import time
    import json
    import re
    import requests
    import execjs
    import base64
    from urllib.parse import urlencode
    from requests_toolbelt import MultipartEncoder
    from Crypto.Cipher import PKCS1_v1_5
    from Crypto.PublicKey import RSA
    from hashlib import md5
    from zlib import crc32
    
    try:
        requests.packages.urllib3.disable_warnings()
    except:
        pass
    
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 '
                             '(KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
               }
    
    # 全局的session
    session = requests.session()
    session.get('https://pan.baidu.com', headers=headers)
    
    class BufferReader(MultipartEncoder):
        """将multipart-formdata转化为stream形式的Proxy类
        """
    
        def __init__(self, fields, boundary=None, callback=None, cb_args=(), cb_kwargs=None):
            self._callback = callback
            self._progress = 0
            self._cb_args = cb_args
            self._cb_kwargs = cb_kwargs or {}
            super(BufferReader, self).__init__(fields, boundary)
    
        def read(self, size=None):
            chunk = super(BufferReader, self).read(size)
            self._progress += int(len(chunk))
            self._cb_kwargs.update({
                'size': self._len,
                'progress': self._progress
            })
            if self._callback:
                try:
                    self._callback(*self._cb_args, **self._cb_kwargs)
                except:  # catches exception from the callback
                    # raise CancelledError('The upload was cancelled.')
                    pass
            return chunk
    
    def _get_runntime():
        """
        :param path: 加密js的路径,注意js中不要使用中文!估计是pyexecjs处理中文还有一些问题
        :return: 编译后的js环境,不清楚pyexecjs这个库的用法的请在github上查看相关文档
        """
        phantom = execjs.get()  # 这里必须为phantomjs设置环境变量,否则可以写phantomjs的具体路径
        with open('login.js', 'r') as f:
            source = f.read()
        return phantom.compile(source)
    
    def get_gid():
        return _get_runntime().call('getGid')
    
    def get_callback():
        return _get_runntime().call('getCallback')
    
    def _get_curtime():
        return int(time.time()*1000)
    
    # 抓包也不是百分百可靠啊,这里?getapi一定要挨着https://passport.baidu.com/v2/api/写,才会到正确的路由
    def get_token(gid, callback):
        cur_time = _get_curtime()
        get_data = {
            'tpl': 'netdisk',
            'subpro': 'netdisk_web',
            'apiver': 'v3',
            'tt': cur_time,
            'class': 'login',
            'gid': gid,
            'logintype': 'basicLogin',
            'callback': callback
        }
        headers.update(dict(Referer='http://pan.baidu.com/', Accept='*/*', Connection='keep-alive', Host='passport.baidu.com'))
        resp = session.get(url='https://passport.baidu.com/v2/api/?getapi', params=get_data, headers=headers)
        if resp.status_code == 200 and callback in resp.text:
            # 如果json字符串中带有单引号,会解析出错,只有统一成双引号才可以正确的解析
            #data = eval(re.search(r'.*?((.*))', resp.text).group(1))
            data = json.loads(re.search(r'.*?((.*))', resp.text).group(1).replace("'", '"'))
            return data.get('data').get('token')
        else:
            print('获取token失败')
            return None
    
    def get_rsa_key(token, gid, callback):
        cur_time = _get_curtime()
        get_data = {
            'token': token,
            'tpl': 'netdisk',
            'subpro': 'netdisk_web',
            'apiver': 'v3',
            'tt': cur_time,
            'gid': gid,
            'callback': callback,
        }
        resp = session.get(url='https://passport.baidu.com/v2/getpublickey', headers=headers, params=get_data)
        if resp.status_code == 200 and callback in resp.text:
            data = json.loads(re.search(r'.*?((.*))', resp.text).group(1).replace("'", '"'))
            return data.get('pubkey'), data.get('key')
        else:
            print('获取rsa key失败')
            return None
    
    def encript_password(password, pubkey):
        """
        import rsa
        使用rsa库加密(法一)
        pub = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey.encode('utf-8'))
        encript_passwd = rsa.encrypt(password.encode('utf-8'), pub)
        return base64.b64encode(encript_passwd).decode('utf-8')
    
        """
        # pubkey必须为bytes类型
        pub=RSA.importKey(pubkey.encode('utf-8'))
        #构造“加密器”
        encryptor=PKCS1_v1_5.new(pub)
        #加密的内容必须为bytes类型
        encript_passwd =encryptor.encrypt(password.encode('utf-8'))
        return base64.b64encode(encript_passwd).decode('utf-8')
    
    def login(token, gid, callback, rsakey, username, password):
        post_data = {
            'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html',
            'charset': 'utf-8',
            'token': token,
            'tpl': 'netdisk',
            'subpro': 'netdisk_web',
            'apiver': 'v3',
            'tt': _get_curtime(),
            'codestring': '',
            'safeflg': 0,
            'u': 'http://pan.baidu.com/disk/home',
            'isPhone': '',
            'detect': 1,
            'gid': gid,
            'quick_user': 0,
            'logintype': 'basicLogin',
            'logLoginType': 'pc_loginBasic',
            'idc': '',
            'loginmerge': 'true',
            'foreignusername': '',
            'username': username,
            'password': password,
            'mem_pass': 'on',
            # 返回的key
            'rsakey': rsakey,
            'crypttype': 12,
            'ppui_logintime': 33554,
            'countrycode': '',
            'callback': 'parent.'+callback
        }
        resp = session.post(url='https://passport.baidu.com/v2/api/?login', data=post_data, headers=headers)
        if 'err_no=0' in resp.text:
            print('登录成功')
        else:
            print('登录失败')
    
    def upload(dest_path,file_handle,token):
         params = {
                'method': 'upload',
                'app_id': "250528",
                'BDUSS': session.cookies['BDUSS'],
                't': str(int(time.time())),
                'bdstoken': token,
                'path': dest_path,
                'ondup': "newcopy"
            }
         # print(params)
         files = {'file': (str(int(time.time())), file_handle)}
         url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')
         api = '%s?%s' % (url, urlencode(params))
         # print(api)
         body = BufferReader(files)
         # print(body)
         baibupan_header = {"Referer": "http://pan.baidu.com/disk/home",
                        "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}
         header = dict(baibupan_header.items())
         # print(headers)
         header.update({"Content-Type": body.content_type})
         response = session.post(api, data=body, verify=False, headers=header)
         return response
    
    def rapidupload(dest_path,file_handler,token):
        """秒传一个文件
        :param file_handler: 文件handler, e.g. open('file','rb')
        :type file_handler: file
    
        :param dest_path: 上传到服务器的路径,包含文件名
        :type dest_path: str
    
        :return: requests.Response
            .. note::
                * 文件已在服务器上存在,不上传,返回示例
                {
                    "path" : "/apps/album/1.jpg",
                    "size" : 372121,
                    "ctime" : 1234567890,
                    "mtime" : 1234567890,
                    "md5" : "cb123afcc12453543ef",
                    "fs_id" : 12345,
                    "isdir" : 0,
                    "request_id" : 12314124
                }
                * 文件不存在,需要上传
                {"errno":404,"info":[],"request_id":XXX}
                * 文件大小不足 256kb (slice-md5 == content-md5) 时
                {"errno":2,"info":[],"request_id":XXX}
                * 远程文件已存在
                {"errno":-8,"info":[],"request_id":XXX}
        """
    
        file_handler.seek(0, 2)
        _BLOCK_SIZE = 2 ** 20
        content_length = file_handler.tell()
        file_handler.seek(0)
    
        # 校验段为前 256KB
        first_256bytes = file_handler.read(256 * 1024)
        slice_md5 = md5(first_256bytes).hexdigest()
    
        content_crc32 = crc32(first_256bytes).conjugate()
        content_md5 = md5(first_256bytes)
    
        while True:
            block = file_handler.read(_BLOCK_SIZE)
            if not block:
                break
            # 更新crc32和md5校验值
            content_crc32 = crc32(block, content_crc32).conjugate()
            content_md5.update(block)
    
        params = {
                'method': 'rapidupload',
                'app_id': "250528",
                'BDUSS': session.cookies['BDUSS'],
                't': str(int(time.time())),
                'bdstoken': token,
                'path': dest_path,
                'ondup': "newcopy"
                }
    
        data = {
                'content-length': content_length,
                'content-md5': content_md5.hexdigest(),
                'slice-md5': slice_md5,
                'content-crc32': '%d' % (content_crc32.conjugate() & 0xFFFFFFFF)
                }
        baibupan_header = {"Referer": "http://pan.baidu.com/disk/home",
                        "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}
        header = dict(baibupan_header.items())
        url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')
        api = '%s?%s' % (url, urlencode(params))
        # print(api)
        response= session.post(api, data=data, verify=False,headers=header)
        return response
    
    if __name__ == '__main__':
        user='xxx'  #用户名
        password='xxx'  #密码
    
        cur_gid = get_gid()
        cur_callback = get_callback()
        cur_token = get_token(cur_gid, cur_callback)
        # print("token:%s" %(cur_token))
        cur_pubkey, cur_key = get_rsa_key(cur_token, cur_gid, cur_callback)
        encript_password = encript_password(password, cur_pubkey)
        login(cur_token, cur_gid, cur_callback, cur_key, user, encript_password)
        # print("cookies:%s" %(session.cookies['BDUSS']))
    
        # res=upload("/hello/temp.txt",open("temp.txt",'rb'),cur_token)
        # print(res.content.decode('utf-8'))
        res=rapidupload("/hello/words.txt",open("words.txt",'rb'),cur_token)
        print(res.content.decode('utf-8'))
      1 #-*- coding:utf-8 -*-
      2 __author__ = 'Administrator'
      3 
      4 import time
      5 import json
      6 import re
      7 import requests
      8 import execjs
      9 import base64
     10 from urllib.parse import urlencode
     11 from requests_toolbelt import MultipartEncoder
     12 from Crypto.Cipher import PKCS1_v1_5
     13 from Crypto.PublicKey import RSA
     14 from hashlib import md5
     15 from zlib import crc32
     16 # import progressbar
     17 import sys
     18 from contextlib import closing
     19 import time
     20 import os
     21 
     22 try:
     23     requests.packages.urllib3.disable_warnings()
     24 except:
     25     pass
     26 
     27 headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 '
     28                          '(KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
     29            }
     30 
     31 # 全局的session
     32 session = requests.session()
     33 session.get('https://pan.baidu.com', headers=headers)
     34 
     35 
     36 class BufferReader(MultipartEncoder):
     37     """将multipart-formdata转化为stream形式的Proxy类
     38     """
     39 
     40     def __init__(self, fields, boundary=None, callback=None, cb_args=(), cb_kwargs=None):
     41         self._callback = callback
     42         self._progress = 0
     43         self._cb_args = cb_args
     44         self._cb_kwargs = cb_kwargs or {}
     45         super(BufferReader, self).__init__(fields, boundary)
     46 
     47     def read(self, size=None):
     48         chunk = super(BufferReader, self).read(size)
     49         self._progress += int(len(chunk))
     50         self._cb_kwargs.update({
     51             'size': self._len,
     52             'progress': self._progress
     53         })
     54         if self._callback:
     55             try:
     56                 self._callback(*self._cb_args, **self._cb_kwargs)
     57             except:  # catches exception from the callback
     58                 # raise CancelledError('The upload was cancelled.')
     59                 pass
     60         return chunk
     61 
     62 class ProgressBar():
     63     """
     64     import progressbar
     65     使用第三方库显示上传进度
     66 
     67     """
     68     def __init__(self):
     69         self.first_call = True
     70     def __call__(self, *args, **kwargs):
     71         if self.first_call:
     72             self.widgets = [progressbar.Percentage(), ' ', progressbar.Bar(marker=progressbar.RotatingMarker('>')),
     73                             ' ', progressbar.FileTransferSpeed()]
     74             self.pbar = progressbar.ProgressBar(widgets=self.widgets, maxval=kwargs['size']).start()
     75             self.first_call = False
     76 
     77         if kwargs['size'] <= kwargs['progress']:
     78             self.pbar.finish()
     79         else:
     80             self.pbar.update(kwargs['progress'])
     81 
     82 
     83 def _get_runntime():
     84     """
     85     :param path: 加密js的路径,注意js中不要使用中文!估计是pyexecjs处理中文还有一些问题
     86     :return: 编译后的js环境,不清楚pyexecjs这个库的用法的请在github上查看相关文档
     87     """
     88     phantom = execjs.get()  # 这里必须为phantomjs设置环境变量,否则可以写phantomjs的具体路径
     89     with open('login.js', 'r') as f:
     90         source = f.read()
     91     return phantom.compile(source)
     92 
     93 def get_gid():
     94     return _get_runntime().call('getGid')
     95 
     96 def get_callback():
     97     return _get_runntime().call('getCallback')
     98 
     99 def _get_curtime():
    100     return int(time.time()*1000)
    101 
    102 # 抓包也不是百分百可靠啊,这里?getapi一定要挨着https://passport.baidu.com/v2/api/写,才会到正确的路由
    103 def get_token(gid, callback):
    104     cur_time = _get_curtime()
    105     get_data = {
    106         'tpl': 'netdisk',
    107         'subpro': 'netdisk_web',
    108         'apiver': 'v3',
    109         'tt': cur_time,
    110         'class': 'login',
    111         'gid': gid,
    112         'logintype': 'basicLogin',
    113         'callback': callback
    114     }
    115     headers.update(dict(Referer='http://pan.baidu.com/', Accept='*/*', Connection='keep-alive', Host='passport.baidu.com'))
    116     resp = session.get(url='https://passport.baidu.com/v2/api/?getapi', params=get_data, headers=headers)
    117     if resp.status_code == 200 and callback in resp.text:
    118         # 如果json字符串中带有单引号,会解析出错,只有统一成双引号才可以正确的解析
    119         #data = eval(re.search(r'.*?((.*))', resp.text).group(1))
    120         data = json.loads(re.search(r'.*?((.*))', resp.text).group(1).replace("'", '"'))
    121         return data.get('data').get('token')
    122     else:
    123         print('获取token失败')
    124         return None
    125 
    126 def get_rsa_key(token, gid, callback):
    127     cur_time = _get_curtime()
    128     get_data = {
    129         'token': token,
    130         'tpl': 'netdisk',
    131         'subpro': 'netdisk_web',
    132         'apiver': 'v3',
    133         'tt': cur_time,
    134         'gid': gid,
    135         'callback': callback,
    136     }
    137     resp = session.get(url='https://passport.baidu.com/v2/getpublickey', headers=headers, params=get_data)
    138     if resp.status_code == 200 and callback in resp.text:
    139         data = json.loads(re.search(r'.*?((.*))', resp.text).group(1).replace("'", '"'))
    140         return data.get('pubkey'), data.get('key')
    141     else:
    142         print('获取rsa key失败')
    143         return None
    144 
    145 def encript_password(password, pubkey):
    146     """
    147     import rsa
    148     使用rsa库加密(法一)
    149     pub = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey.encode('utf-8'))
    150     encript_passwd = rsa.encrypt(password.encode('utf-8'), pub)
    151     return base64.b64encode(encript_passwd).decode('utf-8')
    152 
    153     """
    154     # pubkey必须为bytes类型
    155     pub=RSA.importKey(pubkey.encode('utf-8'))
    156     #构造“加密器”
    157     encryptor=PKCS1_v1_5.new(pub)
    158     #加密的内容必须为bytes类型
    159     encript_passwd =encryptor.encrypt(password.encode('utf-8'))
    160     return base64.b64encode(encript_passwd).decode('utf-8')
    161 
    162 def login(token, gid, callback, rsakey, username, password):
    163     post_data = {
    164         'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html',
    165         'charset': 'utf-8',
    166         'token': token,
    167         'tpl': 'netdisk',
    168         'subpro': 'netdisk_web',
    169         'apiver': 'v3',
    170         'tt': _get_curtime(),
    171         'codestring': '',
    172         'safeflg': 0,
    173         'u': 'http://pan.baidu.com/disk/home',
    174         'isPhone': '',
    175         'detect': 1,
    176         'gid': gid,
    177         'quick_user': 0,
    178         'logintype': 'basicLogin',
    179         'logLoginType': 'pc_loginBasic',
    180         'idc': '',
    181         'loginmerge': 'true',
    182         'foreignusername': '',
    183         'username': username,
    184         'password': password,
    185         'mem_pass': 'on',
    186         # 返回的key
    187         'rsakey': rsakey,
    188         'crypttype': 12,
    189         'ppui_logintime': 33554,
    190         'countrycode': '',
    191         'callback': 'parent.'+callback
    192     }
    193     resp = session.post(url='https://passport.baidu.com/v2/api/?login', data=post_data, headers=headers)
    194     if 'err_no=0' in resp.text:
    195         print('登录成功')
    196     else:
    197         print('登录失败')
    198 def progressbar(size=None, progress=None,progress_title="已完成",finish_title="全部完成"):
    199     #size:文件总字节数 progress:当前传输完成字节数
    200     # print("{0} / {1}".format(size, progress))
    201     if progress<size:
    202         sys.stdout.write(progress_title+""+str(int((progress/size)*100))+' % '+"
    ")
    203         sys.stdout.flush()
    204     else:
    205         progress=size
    206         sys.stdout.write(finish_title+""+str(int((progress/size)*100))+' % '+"
    ")
    207 
    208 def upload(dest_path,file_handle,token,callback=None):
    209      res=rapidupload(dest_path,file_handle,token)
    210      # print(res.content.decode('utf-8'))
    211      result=json.loads(res.content.decode('utf-8'))
    212      if result.get("error_code",-1)==31079:
    213          print("using upload....")
    214          params = {
    215                 'method': 'upload',
    216                 'app_id': "250528",
    217                 'BDUSS': session.cookies['BDUSS'],
    218                 't': str(int(time.time())),
    219                 'bdstoken': token,
    220                 'path': dest_path,
    221                 'ondup': "newcopy"
    222             }
    223          # print(params)
    224          files = {'file': (str(int(time.time())), file_handle)}
    225          url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')
    226          api = '%s?%s' % (url, urlencode(params))
    227          # print(api)
    228          body = BufferReader(files,callback=callback)
    229          # print(body)
    230          baibupan_header = {"Referer": "http://pan.baidu.com/disk/home",
    231                         "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}
    232          header = dict(baibupan_header.items())
    233          # print(headers)
    234          header.update({"Content-Type": body.content_type})
    235          response = session.post(api, data=body, verify=False, headers=header)
    236          return response
    237      else:
    238          print("using rapidupload....")
    239          return res
    240 
    241 def rapidupload(dest_path,file_handler,token,callback=None):
    242     """秒传一个文件
    243     :param file_handler: 文件handler, e.g. open('file','rb')
    244     :type file_handler: file
    245 
    246     :param dest_path: 上传到服务器的路径,包含文件名
    247     :type dest_path: str
    248 
    249     :return: requests.Response
    250         .. note::
    251             * 文件已在服务器上存在,不上传,返回示例
    252             {
    253                 "path" : "/apps/album/1.jpg",
    254                 "size" : 372121,
    255                 "ctime" : 1234567890,
    256                 "mtime" : 1234567890,
    257                 "md5" : "cb123afcc12453543ef",
    258                 "fs_id" : 12345,
    259                 "isdir" : 0,
    260                 "request_id" : 12314124
    261             }
    262             * 文件不存在,需要上传
    263             {"errno":404,"info":[],"request_id":XXX}
    264             * 文件大小不足 256kb (slice-md5 == content-md5) 时
    265             {"errno":2,"info":[],"request_id":XXX}
    266             * 远程文件已存在
    267             {"errno":-8,"info":[],"request_id":XXX}
    268     """
    269     params = {
    270             'method': 'rapidupload',
    271             'app_id': "250528",
    272             'BDUSS': session.cookies['BDUSS'],
    273             't': str(int(time.time())),
    274             'bdstoken': token,
    275             'path': dest_path,
    276             'ondup': "newcopy"
    277             }
    278     baibupan_header = {"Referer": "http://pan.baidu.com/disk/home",
    279                     "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}
    280     header = dict(baibupan_header.items())
    281     url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')
    282     api = '%s?%s' % (url, urlencode(params))
    283 
    284     file_handler.seek(0, 2)
    285     _BLOCK_SIZE = 2 ** 20  #1MB大小
    286     # print(_BLOCK_SIZE)
    287     content_length = file_handler.tell()
    288     # print(content_length)
    289     file_handler.seek(0)
    290 
    291     # 校验段为前 256KB
    292     first_256bytes = file_handler.read(256 * 1024)
    293     slice_md5 = md5(first_256bytes).hexdigest()
    294 
    295     content_crc32 = crc32(first_256bytes).conjugate()
    296     content_md5 = md5(first_256bytes)
    297 
    298     # data = {
    299     #         'content-length': content_length,
    300     #         'content-md5': content_md5.hexdigest(),
    301     #         'slice-md5': slice_md5,
    302     #         'content-crc32': '%d' % (content_crc32.conjugate() & 0xFFFFFFFF)
    303     #         }
    304     # response= session.post(api, data=data, verify=False,headers=header)
    305     # return response
    306 
    307     count=1
    308     while True:
    309         block = file_handler.read(_BLOCK_SIZE)
    310         if callback:
    311             callback(size=content_length,progress=count*_BLOCK_SIZE)
    312         count=count+1
    313         if not block:
    314             break
    315         # 更新crc32和md5校验值
    316         content_crc32 = crc32(block, content_crc32).conjugate()
    317         content_md5.update(block)
    318     data = {
    319             'content-length': content_length,
    320             'content-md5': content_md5.hexdigest(),
    321             'slice-md5': slice_md5,
    322             'content-crc32': '%d' % (content_crc32.conjugate() & 0xFFFFFFFF)
    323             }
    324 
    325     response= session.post(api, data=data, verify=False,headers=header)
    326     return response
    327 
    328 def download(remote_path,file_path,token):
    329     """下载单个文件。
    330     download 接口支持HTTP协议标准range定义,通过指定range的取值可以实现
    331     断点下载功能。 例如:如果在request消息中指定“Range: bytes=0-99”,
    332     那么响应消息中会返回该文件的前100个字节的内容;
    333     继续指定“Range: bytes=100-199”,
    334     那么响应消息中会返回该文件的第二个100字节内容::
    335       >>> headers = {'Range': 'bytes=0-99'}
    336       >>> pcs = PCS('username','password')
    337       >>> pcs.download('/test_sdk/test.txt', headers=headers)
    338     :param remote_path: 网盘中文件的路径(包含文件名)。
    339                         必须以 / 开头。
    340                         .. warning::
    341                             * 路径长度限制为1000;
    342                             * 径中不能包含以下字符:``\\ ? | " > < : *``;
    343                             * 文件名或路径名开头结尾不能是 ``.``
    344                               或空白字符,空白字符包括:
    345                               ``\r, \n, \t, 空格, \0, \x0B`` 。
    346     :return: requests.Response 对象
    347     """
    348     params = {
    349             'method': 'download',
    350             'app_id': "250528",
    351             'BDUSS': session.cookies['BDUSS'],
    352             't': str(int(time.time())),
    353             'bdstoken': token,
    354             'path':remote_path
    355         }
    356     # 兼容原有域名pcs.baidu.com;使用新域名d.pcs.baidu.com,则提供更快、更稳定的下载服务
    357     url = 'https://{0}/rest/2.0/pcs/file'.format('d.pcs.baidu.com')
    358     baibupan_header = {"Referer": "http://pan.baidu.com/disk/home",
    359                 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}
    360     header = dict(baibupan_header.items())
    361     # print(headers)
    362     # header.update({'Range': 'bytes=0-1024'}) #返回1KB内容
    363     # response = session.get(url, params=params, verify=False, headers=header)
    364     # print(response.headers)
    365     # print(response.headers['content-length'])
    366     with closing(session.get(url, params=params, verify=False, headers=header,stream=True)) as response:
    367         chunk_size=1024 #单次请求最大值
    368         count=1
    369         total_size=int(response.headers['content-length']) #内容体总大小
    370         with open(file_path,'wb') as file:
    371             for data in response.iter_content(chunk_size=chunk_size):
    372                 file.write(data)
    373                 progressbar(size=total_size,progress=count*chunk_size,progress_title="正在下载",finish_title="下载完成")
    374                 count=count+1
    375 
    376     """
    377     通过断点续传一点一点下载
    378     start=0
    379     stop=1023
    380     while True:
    381         chunk_size='bytes={0}-{1}'.format(start,stop)
    382         header.update({'Range': chunk_size}) #返回1KB内容
    383         response = session.get(url, params=params, verify=False, headers=header)
    384         # print(response.apparent_encoding)
    385         if response.content:
    386             with open(file_path,'ab') as file:
    387                 file.write(response.content)
    388             start=start+1024
    389             stop=stop+1024
    390         else:
    391             break
    392 
    393     """
    394 
    395 def get_filesize(rote_path,token):
    396     """获得文件(s)的meta
    397     :param rote_path: 文件路径,如 '/aaa.txt'
    398     """
    399     params = {
    400             'method': 'meta',
    401             'app_id': "250528",
    402             'BDUSS': session.cookies['BDUSS'],
    403             't': str(int(time.time())),
    404             'bdstoken': token,
    405             'path':rote_path
    406         }
    407     # url="https://pcs.baidu.com/rest/2.0/pcs/file"
    408     url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')
    409     # api = '%s?%s' % (url, urlencode(params))
    410     baibupan_header = {"Referer": "http://pan.baidu.com/disk/home",
    411                 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}
    412     header = dict(baibupan_header.items())
    413     # print(headers)
    414     response = session.get(url,params=params,verify=False, headers=header)
    415     return response
    416 
    417 def meta(file_list,token):
    418     """获得文件(s)的metainfo
    419 
    420     :param file_list: 文件路径列表,如 ['/aaa.txt']
    421     :type file_list: list
    422 
    423     :return: requests.Response
    424         .. note ::
    425         示例
    426 
    427         * 文件不存在
    428 
    429         {"errno":12,"info":[{"errno":-9}],"request_id":3294861771}
    430 
    431         * 文件存在
    432         {
    433             "errno": 0,
    434 
    435             "info": [
    436 
    437                 {
    438 
    439                     "fs_id": 文件id,
    440 
    441                     "path": "/u5c0fu7c73/mi2su5237recovery.rar",
    442 
    443                     "server_filename": "mi2su5237recovery.rar",
    444 
    445                     "size": 8292134,
    446 
    447                     "server_mtime": 1391274570,
    448 
    449                     "server_ctime": 1391274570,
    450 
    451                     "local_mtime": 1391274570,
    452 
    453                     "local_ctime": 1391274570,
    454 
    455                     "isdir": 0,
    456 
    457                     "category": 6,
    458 
    459                     "path_md5": 279827390796736883,
    460 
    461                     "delete_fs_id": 0,
    462 
    463                     "object_key": "84221121-2193956150-1391274570512754",
    464 
    465                     "block_list": [
    466                         "76b469302a02b42fd0a548f1a50dd8ac"
    467                     ],
    468 
    469                     "md5": "76b469302a02b42fd0a548f1a50dd8ac",
    470 
    471                     "errno": 0
    472 
    473                 }
    474 
    475             ],
    476 
    477             "request_id": 2964868977
    478 
    479         }
    480 
    481     """
    482     if not isinstance(file_list, list):
    483         file_list = [file_list]
    484     data = {'target': json.dumps(file_list)}
    485     params = {
    486             'method': 'filemetas',
    487             'app_id': "250528",
    488             'BDUSS': session.cookies['BDUSS'],
    489             't': str(int(time.time())),
    490             'bdstoken': token
    491         }
    492     print(token)
    493     baibupan_header = {"Referer": "http://pan.baidu.com/disk/home",
    494                 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}
    495     header = dict(baibupan_header.items())
    496     uri='filemetas?blocks=0&dlink=1'
    497     url='http://pan.baidu.com/api/{0}'.format(uri)
    498     print(url)
    499     if '?' in url:
    500         api = "%s&%s" % (url, urlencode(params))
    501     else:
    502         api = '%s?%s' % (url, urlencode(params))
    503     print(api)
    504     print(data)
    505     response=session.post(api,data=data,verify=False,headers=header)
    506     return response
    507     # return self._request('filemetas?blocks=0&dlink=1', 'filemetas', data=data, **kwargs)
    508 
    509 if __name__ == '__main__':
    510     user='xxx'
    511     password='xxx'
    512 
    513     cur_gid = get_gid()
    514     cur_callback = get_callback()
    515     cur_token = get_token(cur_gid, cur_callback)
    516     # print("token:%s" %(cur_token))
    517     cur_pubkey, cur_key = get_rsa_key(cur_token, cur_gid, cur_callback)
    518     encript_password = encript_password(password, cur_pubkey)
    519     login(cur_token, cur_gid, cur_callback, cur_key, user, encript_password)
    520     # print("cookies:%s" %(session.cookies['BDUSS']))
    521 
    522     res=upload("/hello/word.py",open("test_BaiduPan.py",'rb'),cur_token,callback=progressbar)
    523     print(res.content.decode('utf-8'))
    524 
    525 
    526     # res=rapidupload("/hello/traindata.js",open("login.js",'rb'),cur_token,callback=progressbar)
    527     # print(json.loads(res.content.decode('utf-8')))
    528 
    529 
    530     # download("/hello/words.txt","word.txt",cur_token)
    531     # print(res.content.decode('utf-8'))
    532 
    533     # res=get_filesize("/hello/words",cur_token)
    534     # print(res.content.decode('utf-8'))
    535 
    536     # res=meta("/hello/words.txt",cur_token)
    537     # print(res.content)
      1 #-*- coding:utf-8 -*-
      2 __author__ = 'Administrator'
      3 
      4 import time
      5 import json
      6 import re
      7 import requests
      8 import execjs
      9 import base64
     10 from urllib.parse import urlencode
     11 from requests_toolbelt import MultipartEncoder
     12 from Crypto.Cipher import PKCS1_v1_5
     13 from Crypto.PublicKey import RSA
     14 from hashlib import md5
     15 from zlib import crc32
     16 # import progressbar
     17 import sys
     18 from contextlib import closing
     19 import time
     20 import os
     21 from io import BytesIO
     22 
     23 try:
     24     requests.packages.urllib3.disable_warnings()
     25 except:
     26     pass
     27 
     28 # class BufferReader(MultipartEncoder):
     29 #     """将multipart-formdata转化为stream形式的Proxy类
     30 #     """
     31 #     def __init__(self, fields, boundary=None, callback=None, cb_args=(), cb_kwargs=None):
     32 #         self._callback = callback
     33 #         self._progress = 0
     34 #         self._cb_args = cb_args
     35 #         self._cb_kwargs = cb_kwargs or {}
     36 #         super(BufferReader, self).__init__(fields, boundary)
     37 #
     38 #     def read(self, size=None):
     39 #         chunk = super(BufferReader, self).read(size)
     40 #         self._progress += int(len(chunk))
     41 #         self._cb_kwargs.update({
     42 #             'size': self._len,
     43 #             'progress': self._progress
     44 #         })
     45 #         if self._callback:
     46 #             try:
     47 #                 self._callback(*self._cb_args, **self._cb_kwargs)
     48 #             except:  # catches exception from the callback
     49 #                 # raise CancelledError('The upload was cancelled.')
     50 #                 pass
     51 #         return chunk
     52 
     53 class BufferReader(BytesIO):
     54     """
     55     """
     56     def __init__(self, filebytes, callback=None):
     57         self._callback = callback
     58         self._progress = 0
     59         self._size =len(filebytes)
     60         super(BufferReader, self).__init__(filebytes)
     61 
     62     def read(self, size=-1):
     63         chunk_size=8192
     64         chunk = BytesIO.read(self,chunk_size)
     65         self._progress += int(len(chunk))
     66         if self._callback:
     67             self._callback(self._size,self._progress)
     68         return chunk
     69 
     70 class PCSBase():
     71     def __init__(self,username,password):
     72         self.session=requests.session()
     73         self.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 '
     74                          '(KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
     75            }
     76         self.session.get('https://pan.baidu.com', headers=self.headers)
     77         self.username=username
     78         self.password=password
     79         self.user={}
     80         self.cur_gid=self.get_gid()
     81         self.cur_callback=self.get_callback()
     82         self.cur_time=self._get_curtime()
     83         self._initiate()#登录成功,并获取session.cookies
     84 
     85     def _initiate(self):
     86         self.user['token']= self.get_token()
     87         # print("token:%s" %(self.get_token()))
     88         self.login()
     89         # print("cookies:%s" %(session.cookies['BDUSS']))
     90     def _get_runntime(self):
     91         """
     92         :param path: 加密js的路径,注意js中不要使用中文!估计是pyexecjs处理中文还有一些问题
     93         :return: 编译后的js环境,不清楚pyexecjs这个库的用法的请在github上查看相关文档
     94         """
     95         phantom = execjs.get()  # 这里必须为phantomjs设置环境变量,否则可以写phantomjs的具体路径
     96         with open('login.js', 'r') as f:
     97             source = f.read()
     98         return phantom.compile(source)
     99 
    100     def get_gid(self):
    101         return self._get_runntime().call('getGid')
    102 
    103     def get_callback(self):
    104         return self._get_runntime().call('getCallback')
    105 
    106     def _get_curtime(self):
    107         return int(time.time()*1000)
    108         # 抓包也不是百分百可靠啊,这里?getapi一定要挨着https://passport.baidu.com/v2/api/写,才会到正确的路由
    109     def get_token(self):
    110         get_data = {
    111             'tpl': 'netdisk',
    112             'subpro': 'netdisk_web',
    113             'apiver': 'v3',
    114             'tt':self.cur_time,
    115             'class': 'login',
    116             'gid': self.cur_gid,
    117             'logintype': 'basicLogin',
    118             'callback': self.cur_callback
    119         }
    120         self.headers.update(dict(Referer='http://pan.baidu.com/', Accept='*/*', Connection='keep-alive', Host='passport.baidu.com'))
    121         resp = self.session.get(url='https://passport.baidu.com/v2/api/?getapi', params=get_data, headers=self.headers)
    122         if resp.status_code == 200 and self.cur_callback in resp.text:
    123             # 如果json字符串中带有单引号,会解析出错,只有统一成双引号才可以正确的解析
    124             #data = eval(re.search(r'.*?((.*))', resp.text).group(1))
    125             data = json.loads(re.search(r'.*?((.*))', resp.text).group(1).replace("'", '"'))
    126             return data.get('data').get('token')
    127         else:
    128             print('获取token失败')
    129             return None
    130 
    131     def get_rsa_key(self):
    132         get_data = {
    133             'token': self.user['token'],
    134             'tpl': 'netdisk',
    135             'subpro': 'netdisk_web',
    136             'apiver': 'v3',
    137             'tt': self.cur_time,
    138             'gid': self.cur_gid,
    139             'callback': self.cur_callback
    140         }
    141         resp = self.session.get(url='https://passport.baidu.com/v2/getpublickey', headers=self.headers, params=get_data)
    142         if resp.status_code == 200 and self.cur_callback in resp.text:
    143             data = json.loads(re.search(r'.*?((.*))', resp.text).group(1).replace("'", '"'))
    144             return data.get('pubkey'), data.get('key')
    145         else:
    146             print('获取rsa key失败')
    147             return None
    148 
    149     def encript_password(self,pubkey):
    150         """
    151         import rsa
    152         使用rsa库加密(法一)
    153         pub = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey.encode('utf-8'))
    154         encript_passwd = rsa.encrypt(password.encode('utf-8'), pub)
    155         return base64.b64encode(encript_passwd).decode('utf-8')
    156 
    157         """
    158         # pubkey必须为bytes类型
    159         pub=RSA.importKey(pubkey.encode('utf-8'))
    160         #构造“加密器”
    161         encryptor=PKCS1_v1_5.new(pub)
    162         #加密的内容必须为bytes类型
    163         encript_passwd =encryptor.encrypt(self.password.encode('utf-8'))
    164         return base64.b64encode(encript_passwd).decode('utf-8')
    165 
    166     def login(self):
    167         cur_pubkey, cur_key = self.get_rsa_key()
    168         encript_password =self.encript_password(cur_pubkey)
    169         post_data = {
    170             'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html',
    171             'charset': 'utf-8',
    172             'token': self.user['token'],
    173             'tpl': 'netdisk',
    174             'subpro': 'netdisk_web',
    175             'apiver': 'v3',
    176             'tt': self.cur_time,
    177             'codestring': '',
    178             'safeflg': 0,
    179             'u': 'http://pan.baidu.com/disk/home',
    180             'isPhone': '',
    181             'detect': 1,
    182             'gid': self.cur_gid,
    183             'quick_user': 0,
    184             'logintype': 'basicLogin',
    185             'logLoginType': 'pc_loginBasic',
    186             'idc': '',
    187             'loginmerge': 'true',
    188             'foreignusername': '',
    189             'username': self.username,
    190             'password': encript_password,
    191             'mem_pass': 'on',
    192             # 返回的key
    193             'rsakey': cur_key,
    194             'crypttype': 12,
    195             'ppui_logintime': 33554,
    196             'countrycode': '',
    197             'callback': 'parent.'+self.cur_callback
    198         }
    199         resp = self.session.post(url='https://passport.baidu.com/v2/api/?login', data=post_data, headers=self.headers)
    200         if 'err_no=0' in resp.text:
    201             print('登录成功')
    202             self.user['BDUSS'] = self.session.cookies['BDUSS']
    203         else:
    204             print('登录失败')
    205             self.user['BDUSS']=None
    206 
    207     def _request(self,url,data=None,files=None,extra_params=None,callback=None):
    208         params={
    209             'app_id': "250528",
    210             'BDUSS': self.user['BDUSS'],
    211             't': str(int(time.time())),
    212             'bdstoken': self.user['token']
    213         }
    214         if extra_params:
    215             params.update(extra_params)
    216         # print("params:%s" %params)
    217         baibupan_header = {"Referer": "http://pan.baidu.com/disk/home",
    218                     "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}
    219         header= dict(baibupan_header.items())
    220         if data or files:
    221             api = '%s?%s' % (url, urlencode(params))
    222             # print("api:%s" %api)
    223             if data:
    224                 res=self.session.post(api,data=data,verify=False, headers=header)
    225                 return res
    226             else:
    227                 # print(callback==None)
    228                 (filedata,contenttype)=requests.packages.urllib3.filepost.encode_multipart_formdata(files)
    body=BufferReader(filedata,callback=callback)
    229 # print("body:%s" %type(body)) 230 header.update({ 231 "Content-Type": contenttype 232 }) 233 # print("header:%s" %header) 234 res=self.session.post(api,data=body,verify=False, headers=header) 235 return res 236 else: 237 res=self.session.get(url,params=params,verify=False, headers=header,stream=True) 238 return res 239 240 class PCS(PCSBase): 241 def __init__(self,username,password): 242 self.username=username 243 self.password=password 244 super(PCS,self).__init__(self.username,self.password) 245 246 def upload(self,remote_path,file_handler,callback=None): 247 params={ 248 'method': 'upload', 249 'path': remote_path, 250 'ondup': "newcopy" 251 } 252 files = {'file': (str(int(time.time())), file_handler)} 253 url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com') 254 response=self._request(url,files=files,extra_params=params,callback=callback) 255 return response 256 257 def rapid_upload(self,remote_path,file_handler,callback=None): 258 params={ 259 'method':"rapidupload", 260 'path':remote_path, 261 'ondup':"newcopy" 262 } 263 url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com') 264 file_handler.seek(0, 2) 265 _BLOCK_SIZE = 2 ** 20 #1MB大小 266 # print(_BLOCK_SIZE) 267 content_length = file_handler.tell() 268 # print(content_length) 269 file_handler.seek(0) 270 271 # 校验段为前 256KB 272 first_256bytes = file_handler.read(256 * 1024) 273 slice_md5 = md5(first_256bytes).hexdigest() 274 275 content_crc32 = crc32(first_256bytes).conjugate() 276 content_md5 = md5(first_256bytes) 277 278 count=1 279 while True: 280 block = file_handler.read(_BLOCK_SIZE) 281 if callback: 282 callback(size=content_length,progress=count*_BLOCK_SIZE) 283 count=count+1 284 if not block: 285 break 286 # 更新crc32和md5校验值 287 content_crc32 = crc32(block, content_crc32).conjugate() 288 content_md5.update(block) 289 data = { 290 'content-length': content_length, 291 'content-md5': content_md5.hexdigest(), 292 'slice-md5': slice_md5, 293 'content-crc32': '%d' % (content_crc32.conjugate() & 0xFFFFFFFF) 294 } 295 response=self._request(url,data=data,extra_params=params,callback=callback) 296 return response 297 298 def download(self,remote_path,local_path,callback=None): 299 params={ 300 'method':"download", 301 'path':remote_path 302 } 303 # 兼容原有域名pcs.baidu.com;使用新域名d.pcs.baidu.com,则提供更快、更稳定的下载服务 304 url = 'https://{0}/rest/2.0/pcs/file'.format('d.pcs.baidu.com') 305 with closing(self._request(url, extra_params=params)) as response: 306 chunk_size=1024 #单次请求最大值 307 count=1 308 total_size=int(response.headers['content-length']) #内容体总大小 309 with open(local_path,'wb') as file: 310 for data in response.iter_content(chunk_size=chunk_size): 311 file.write(data) 312 self.progressbar(size=total_size,progress=count*chunk_size,progress_title="正在下载",finish_title="下载完成") 313 count=count+1 314 315 def progressbar(self,size=None, progress=None,progress_title="正在上传",finish_title="上传完成"): 316 #size:文件总字节数 progress:当前传输完成字节数 317 # print("{0} / {1}".format(size, progress)) 318 if progress<size: 319 sys.stdout.write(progress_title+""+str(int((progress/size)*100))+' % '+" ") 320 sys.stdout.flush() 321 else: 322 progress=size 323 sys.stdout.write(finish_title+""+str(int((progress/size)*100))+' % '+" ") 324 325 326 if __name__ == '__main__': 327 username="xxx" 328 password="xxx" 329 pcs=PCS(username,password) 330 res=pcs.upload("/hello/word.js",open("login.js",'rb').read(),callback=pcs.progressbar) 331 print(res.content.decode('utf-8')) 332 res=pcs.rapid_upload("/hello/word.js",open("login.js",'rb'),callback=pcs.progressbar) 333 print(res.content.decode('utf-8')) 334 pcs.download("/hello/word.js","temp.js")
  • 相关阅读:
    winget
    splunk单节点容器部署
    jumpserver容器化部署
    思科acl
    Java springboot-plus
    接口 Postman 上传图片测试
    EF 数据迁移 新
    电商 抓取淘宝分类 包含图片和名称
    思维导图 淘宝上新流程
    功能模块 上传视频 生成视频预览图
  • 原文地址:https://www.cnblogs.com/yizhenfeng168/p/7067966.html
Copyright © 2011-2022 走看看