zoukankan      html  css  js  c++  java
  • Python爬虫-破解有道词典(破解MD5的JS加密算法)

    破解有道词典

    1.进行普通爬取尝试:

     1 '''
     2 破解有道词典
     3 V1
     4 '''
     5 
     6 from urllib import request, parse
     7 
     8 
     9 def youdao(key):
    10 
    11     url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
    12 
    13     data = {
    14         "i": "boy",
    15         "from":"AUTO",
    16         "to": "AUTO",
    17         "smartresult": "dict",
    18         "client": "fanyideskweb",
    19         "salt": "1523100789519",
    20         "sign": "b8a55a436686cd89873fa46514ccedbe",
    21         "doctype": "json",
    22         "version": "2.1",
    23         "keyfrom": "fanyi.web",
    24         "action":"FY_BY_REALTIME",
    25         "typoResult": "false"
    26     }
    27 
    28     # 参数data需要是bytes格式
    29     data = parse.urlencode(data).encode()
    30 
    31     headers = {
    32                   "Accept": "application/json,text/javascript,*/*;q=0.01",
    33                   #"Accept-Encoding": "gzip,deflate",
    34                   "Accept-Language": "zh-CN,zh;q=0.9",
    35                   "Connection": "keep-alive",
    36                   "Content-Length": "200",
    37                   "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
    38                   "Cookie": "OUTFOX_SEARCH_USER_ID=-1548144101@10.168.8.76;JSESSIONID=aaaTLWzfvp5Hfg9mAhFkw;OUTFOX_SEARCH_USER_ID_NCOO=1999296830.4784973;___rl__test__cookies=1523100789517",
    39                   "Host": "fanyi.youdao.com",
    40                   "Origin": "http://fanyi.youdao.com",
    41                   "Referer": "http://fanyi.youdao.com/",
    42                   "User-Agent": "Mozilla/5.0( X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36 X-Requested-With: XMLHttpRequest"
    43     }
    44 
    45     req = request.Request(url=url, data=data, headers=headers)
    46 
    47     rsp = request.urlopen(req)
    48 
    49     html = rsp.read().decode()
    50     print(html)
    51 
    52 if __name__ == '__main__':
    53     youdao("boy")

    2.破解有道词典的JS-,MD5-加密算法进行爬取数据(处理JS加密代码)

     1 '''
     2 V2
     3 处理js加密代码
     4 '''
     5 
     6 '''
     7 通过查找,能找到js代码中操作代码
     8 
     9 1. 这个是计算salt的公式 r = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10));
    10 2. sign: n.md5("fanyideskweb" + t + r + "ebSeFb%=XZ%T[KZ)c(sy!");
    11 md5一共需要四个参数,第一个和第四个都是固定值的字符串,第三个是所谓的salt,第二个是。。。。。
    12 第二个参数就是输入的要查找的单词
    13 
    14 '''
    15 
    16 
    17 def getSalt():
    18     '''
    19     salt公式是:  "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10));
    20     把他翻译成python代码
    21     :return:
    22     '''
    23     import time, random
    24 
    25     salt = int(time.time()*1000) + random.randint(0,10)
    26 
    27     return salt
    28 
    29 def getMD5(v):
    30     import hashlib
    31     md5 = hashlib.md5()
    32 
    33     # update需要一共bytes格式的参数
    34     md5.update(v.encode("utf-8"))
    35 
    36     sign = md5.hexdigest()
    37 
    38     return sign
    39 
    40 
    41 def getSign(key, salt):
    42 
    43     sign = 'fanyideskweb'+ key + str(salt) + "ebSeFb%=XZ%T[KZ)c(sy!"
    44     sign = getMD5(sign)
    45 
    46     return sign
    47 
    48 from urllib import request, parse
    49 
    50 
    51 def youdao(key):
    52 
    53     url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
    54 
    55     salt = getSalt()
    56 
    57     data = {
    58         "i": key,
    59         "from":"AUTO",
    60         "to": "AUTO",
    61         "smartresult": "dict",
    62         "client": "fanyideskweb",
    63         "salt": str(salt) ,
    64         "sign": getSign(key, salt),
    65         "doctype": "json",
    66         "version": "2.1",
    67         "keyfrom": "fanyi.web",
    68         "action":"FY_BY_REALTIME",
    69         "typoResult": "false"
    70     }
    71 
    72     print(data)
    73 
    74     # 参数data需要是bytes格式
    75     data = parse.urlencode(data).encode()
    76 
    77     headers = {
    78         "Accept": "application/json,text/javascript,*/*;q=0.01",
    79         #"Accept-Encoding": "gzip,deflate",
    80         "Accept-Language": "zh-CN,zh;q=0.9",
    81         "Connection": "keep-alive",
    82         "Content-Length": len(data),
    83         "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
    84         "Cookie": "OUTFOX_SEARCH_USER_ID=-1548144101@10.168.8.76;JSESSIONID=aaaTLWzfvp5Hfg9mAhFkw;OUTFOX_SEARCH_USER_ID_NCOO=1999296830.4784973;___rl__test__cookies=1523100789517",
    85         "Host": "fanyi.youdao.com",
    86         "Origin": "http://fanyi.youdao.com",
    87         "Referer": "http://fanyi.youdao.com/",
    88         "User-Agent": "Mozilla/5.0( X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36 X-Requested-With: XMLHttpRequest"
    89     }
    90 
    91     req = request.Request(url=url, data=data, headers=headers)
    92 
    93     rsp = request.urlopen(req)
    94 
    95     html = rsp.read().decode()
    96     print(html)
    97 
    98 if __name__ == '__main__':
    99     youdao("boy")

    =========================

    ==================================

    ==================================

    ======================================

     ==========================================

    结果示例:

    JS代码格式化工具:

    http://tool.oschina.net/codeformat/js

  • 相关阅读:
    Python 工匠:编写条件分支代码的技巧
    component-scan标签的use-default-filters属性的作用以及原理分析
    Serverless 架构的优点和缺点
    5 种使用 Python 代码轻松实现数据可视化的方法
    曾经我是一个只会excel的数据分析师,直到我遇到了……
    月薪45K的Python爬虫工程师告诉你爬虫应该怎么学,太详细了!
    用 Python 构建一个极小的区块链
    第六章 程序数据集散地;数据库
    MyBankgon功能
    第四章 深入C#的string类
  • 原文地址:https://www.cnblogs.com/xuxaut-558/p/10085130.html
Copyright © 2011-2022 走看看