zoukankan      html  css  js  c++  java
  • (反反爬虫)同程网酒店用户评论的全局token

    反反爬虫系列将会陆续的介绍一些爬虫的高级玩法,主要就是分析网站的加密方式,通过python代码实现并爬取数据!

     OK,今天要分析的网站为同程网,获取其酒店的用户评论,评论信息通过JSON返回的API。页面大概长这个样子。

    抓包调试一下~F12却出现这个蛋疼的东西,对chrome控制台动了手脚。

    很明显的就能发现是这段JS在搞鬼

     

    没办法ctrl+s保存html到本地。将这段JS找到把里面的函数删除掉即可

    这里有坑的,这些JS文件都是压缩过的,所以务必要保持结构的完整性。在用sublime打开的时候,删除这段函数的内容即可,不要视图格式化这个JS文件。

     OK!这个时候再打开本地保存的html文件,F12就可以进行抓包分析了。(当然了你和我一样用fiddler抓包的话,上面的就相当于白说。对于这种反爬对控制台动手脚的网站,建议用fiddler进行抓包分析!)

    headers = {
            'Host': 'www.ly.com',
            "Referer": "https://www.ly.com/HotelInfo-92515879.html",
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/71.0.3578.98 Safari/537.36',
        }
    
    params = {
            'hotelid': "92515879",
            'page': "1",
            'pageSize': '10',
            'commentType': '0',
            'roomTypeId': '',
            'tripPurposeId': '',
            'RankType': '1',
            'mainTagId': '',
            'subTagId': '',
            'antitoken': "54fc51cc98d934d4b2e054c58ca905f6",
        }

     很明显就能发现一个关键参数antitoken。接下来就是获取这个antitoken。搜索一下antitoken关键字。

     

    搜索出来的结果都指向last.js文件,那就没啥好说的观察这个文件里面的函数。将这个文件全部复制下,然后在Sourses里新建一个Snippets,将代码复制进去,继续搜索antitoken。

    这就是一段ajax代码,从cookie中获取“wangba”,在cookie中找到wangba,发现他是一个类似于时间戳的东西,后面其实也介绍到了,e其实就是一个时间戳。那就好说了,直接模拟。

    e=(new Date).getTime().toString()

    直接在snippet中修改,然后在debug调试下JS找到antitoken的生成函数。两步找到函数~

    function(e, t, a) {
        var n, i, o, s, r;
        n = a(29),
        i = a(12).utf8,
        o = a(30),
        s = a(12).bin,
        (r = function(e, t) {
            e.constructor == String ? e = t && "binary" === t.encoding ? s.stringToBytes(e) : i.stringToBytes(e) : o(e) ? e = Array.prototype.slice.call(e, 0) : Array.isArray(e) || (e = e.toString());
            for (var a = n.bytesToWords(e), l = 8 * e.length, c = 1732584193, d = -271733879, p = -1732584194, u = 271733878, m = 0; m < a.length; m++)
                a[m] = 16711935 & (a[m] << 8 | a[m] >>> 24) | 4278255360 & (a[m] << 24 | a[m] >>> 8);
            a[l >>> 5] |= 128 << l % 32,
            a[14 + (l + 64 >>> 9 << 4)] = l;
            var f = r._ff
              , h = r._gg
              , v = r._hh
              , g = r._ii;
            for (m = 0; m < a.length; m += 16) {
                var y = c
                  , _ = d
                  , b = p
                  , $ = u;
                d = g(d = g(d = g(d = g(d = v(d = v(d = v(d = v(d = h(d = h(d = h(d = h(d = f(d = f(d = f(d = f(d, p = f(p, u = f(u, c = f(c, d, p, u, a[m + 0], 7, -680876936), d, p, a[m + 1], 12, -389564586), c, d, a[m + 2], 17, 606105819), u, c, a[m + 3], 22, -1044525330), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 4], 7, -176418897), d, p, a[m + 5], 12, 1200080426), c, d, a[m + 6], 17, -1473231341), u, c, a[m + 7], 22, -45705983), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 8], 7, 1770035416), d, p, a[m + 9], 12, -1958414417), c, d, a[m + 10], 17, -42063), u, c, a[m + 11], 22, -1990404162), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 12], 7, 1804603682), d, p, a[m + 13], 12, -40341101), c, d, a[m + 14], 17, -1502002290), u, c, a[m + 15], 22, 1236535329), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 1], 5, -165796510), d, p, a[m + 6], 9, -1069501632), c, d, a[m + 11], 14, 643717713), u, c, a[m + 0], 20, -373897302), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 5], 5, -701558691), d, p, a[m + 10], 9, 38016083), c, d, a[m + 15], 14, -660478335), u, c, a[m + 4], 20, -405537848), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 9], 5, 568446438), d, p, a[m + 14], 9, -1019803690), c, d, a[m + 3], 14, -187363961), u, c, a[m + 8], 20, 1163531501), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 13], 5, -1444681467), d, p, a[m + 2], 9, -51403784), c, d, a[m + 7], 14, 1735328473), u, c, a[m + 12], 20, -1926607734), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 5], 4, -378558), d, p, a[m + 8], 11, -2022574463), c, d, a[m + 11], 16, 1839030562), u, c, a[m + 14], 23, -35309556), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 1], 4, -1530992060), d, p, a[m + 4], 11, 1272893353), c, d, a[m + 7], 16, -155497632), u, c, a[m + 10], 23, -1094730640), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 13], 4, 681279174), d, p, a[m + 0], 11, -358537222), c, d, a[m + 3], 16, -722521979), u, c, a[m + 6], 23, 76029189), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 9], 4, -640364487), d, p, a[m + 12], 11, -421815835), c, d, a[m + 15], 16, 530742520), u, c, a[m + 2], 23, -995338651), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 0], 6, -198630844), d, p, a[m + 7], 10, 1126891415), c, d, a[m + 14], 15, -1416354905), u, c, a[m + 5], 21, -57434055), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 12], 6, 1700485571), d, p, a[m + 3], 10, -1894986606), c, d, a[m + 10], 15, -1051523), u, c, a[m + 1], 21, -2054922799), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 8], 6, 1873313359), d, p, a[m + 15], 10, -30611744), c, d, a[m + 6], 15, -1560198380), u, c, a[m + 13], 21, 1309151649), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 4], 6, -145523070), d, p, a[m + 11], 10, -1120210379), c, d, a[m + 2], 15, 718787259), u, c, a[m + 9], 21, -343485551),
                c = c + y >>> 0,
                d = d + _ >>> 0,
                p = p + b >>> 0,
                u = u + $ >>> 0
            }
            return n.endian([c, d, p, u])
        }
        )._ff = function(e, t, a, n, i, o, s) {
            var r = e + (t & a | ~t & n) + (i >>> 0) + s;
            return (r << o | r >>> 32 - o) + t
        }
        ,
        r._gg = function(e, t, a, n, i, o, s) {
            var r = e + (t & n | a & ~n) + (i >>> 0) + s;
            return (r << o | r >>> 32 - o) + t
        }
        ,
        r._hh = function(e, t, a, n, i, o, s) {
            var r = e + (t ^ a ^ n) + (i >>> 0) + s;
            return (r << o | r >>> 32 - o) + t
        }
        ,
        r._ii = function(e, t, a, n, i, o, s) {
            var r = e + (a ^ (t | ~n)) + (i >>> 0) + s;
            return (r << o | r >>> 32 - o) + t
        }
        ,
        r._blocksize = 16,
        r._digestsize = 16,
        e.exports = function(e, t) {
            if (e === undefined || null === e)
                throw new Error("Illegal argument " + e);
            var a = n.wordsToBytes(r(e, t));
            return t && t.asBytes ? a : t && t.asString ? s.bytesToString(a) : n.bytesToHex(a)
        }
    }

    这个函数接收e,t,a三个参数,最后返回t对象,定义了一些变量,var n, i, o, s, r; 就是根据这些变量返回t对象,调用t.getantitoken从而获取到antitoken值。OK思路有了接下来就是伪造这些n, i, o, s, 那么如何进行伪造咧,很简单。继续调试,找到这几个变量的生成函数。

     

    这几个参数都是由a这个对象生成的。找a

     

    将之前的断点取消,在n出打断点,运行程序。然后控制台输入a(29)果然得到一个函数~

    function(e, t) {
        var a, n;
        a = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
        n = {
            rotl: function(e, t) {
                return e << t | e >>> 32 - t
            },
            rotr: function(e, t) {
                return e << 32 - t | e >>> t
            },
            endian: function(e) {
                if (e.constructor == Number)
                    return 16711935 & n.rotl(e, 8) | 4278255360 & n.rotl(e, 24);
                for (var t = 0; t < e.length; t++)
                    e[t] = n.endian(e[t]);
                return e
            },
            randomBytes: function(e) {
                for (var t = []; e > 0; e--)
                    t.push(Math.floor(256 * Math.random()));
                return t
            },
            bytesToWords: function(e) {
                for (var t = [], a = 0, n = 0; a < e.length; a++,
                n += 8)
                    t[n >>> 5] |= e[a] << 24 - n % 32;
                return t
            },
            wordsToBytes: function(e) {
                for (var t = [], a = 0; a < 32 * e.length; a += 8)
                    t.push(e[a >>> 5] >>> 24 - a % 32 & 255);
                return t
            },
            bytesToHex: function(e) {
                for (var t = [], a = 0; a < e.length; a++)
                    t.push((e[a] >>> 4).toString(16)),
                    t.push((15 & e[a]).toString(16));
                return t.join("")
            },
            hexToBytes: function(e) {
                for (var t = [], a = 0; a < e.length; a += 2)
                    t.push(parseInt(e.substr(a, 2), 16));
                return t
            },
            bytesToBase64: function(e) {
                for (var t = [], n = 0; n < e.length; n += 3)
                    for (var i = e[n] << 16 | e[n + 1] << 8 | e[n + 2], o = 0; o < 4; o++)
                        8 * n + 6 * o <= 8 * e.length ? t.push(a.charAt(i >>> 6 * (3 - o) & 63)) : t.push("=");
                return t.join("")
            },
            base64ToBytes: function(e) {
                e = e.replace(/[^A-Z0-9+/]/gi, "");
                for (var t = [], n = 0, i = 0; n < e.length; i = ++n % 4)
                    0 != i && t.push((a.indexOf(e.charAt(n - 1)) & Math.pow(2, -2 * i + 8) - 1) << 2 * i | a.indexOf(e.charAt(n)) >>> 6 - 2 * i);
                return t
            }
        },
        e.exports = n
    }

    再次输入a(12)

    function(e, t) {
        var a = {
            utf8: {
                stringToBytes: function(e) {
                    return a.bin.stringToBytes(unescape(encodeURIComponent(e)))
                },
                bytesToString: function(e) {
                    return decodeURIComponent(escape(a.bin.bytesToString(e)))
                }
            },
            bin: {
                stringToBytes: function(e) {
                    for (var t = [], a = 0; a < e.length; a++)
                        t.push(255 & e.charCodeAt(a));
                    return t
                },
                bytesToString: function(e) {
                    for (var t = [], a = 0; a < e.length; a++)
                        t.push(String.fromCharCode(e[a]));
                    return t.join("")
                }
            }
        };
        e.exports = a
    }

     

    a(30) = Null

     所以n、i、o、s这几个参数都找到了,那么就是构建antitoken函数,带上关键参数生成了。

      1 e = (new Date()).getTime().toString();
      2 
      3 //定义antitoken
      4 function antitoken(e) {
      5 var a12 = {
      6 utf8: {
      7 stringToBytes: function (e) {
      8 return a12.bin.stringToBytes(unescape(encodeURIComponent(e)))
      9 },
     10 bytesToString: function (e) {
     11 return decodeURIComponent(escape(a.bin.bytesToString(e)))
     12 }
     13 },
     14 bin: {
     15 stringToBytes: function (e) {
     16 for (var t = [], a = 0; a < e.length; a++)
     17 t.push(255 & e.charCodeAt(a));
     18 return t
     19 },
     20 bytesToString: function (e) {
     21 for (var t = [], a = 0; a < e.length; a++)
     22 t.push(String.fromCharCode(e[a]));
     23 return t.join("")
     24 }
     25 }
     26 };
     27 var t = null;
     28 var n, i, o, s, r;
     29 n = {
     30 rotl: function (e, t) {
     31 return e << t | e >>> 32 - t
     32 },
     33 rotr: function (e, t) {
     34 return e << 32 - t | e >>> t
     35 },
     36 endian: function (e) {
     37 if (e.constructor == Number)
     38 return 16711935 & n.rotl(e, 8) | 4278255360 & n.rotl(e, 24);
     39 for (var t = 0; t < e.length; t++)
     40 e[t] = n.endian(e[t]);
     41 return e
     42 },
     43 randomBytes: function (e) {
     44 for (var t = []; e > 0; e--)
     45 t.push(Math.floor(256 * Math.random()));
     46 return t
     47 },
     48 bytesToWords: function (e) {
     49 for (var t = [], a = 0, n = 0; a < e.length; a++,
     50 n += 8)
     51 t[n >>> 5] |= e[a] << 24 - n % 32;
     52 return t
     53 },
     54 wordsToBytes: function (e) {
     55 for (var t = [], a = 0; a < 32 * e.length; a += 8)
     56 t.push(e[a >>> 5] >>> 24 - a % 32 & 255);
     57 return t
     58 },
     59 bytesToHex: function (e) {
     60 for (var t = [], a = 0; a < e.length; a++)
     61 t.push((e[a] >>> 4).toString(16)),
     62 t.push((15 & e[a]).toString(16));
     63 return t.join("")
     64 },
     65 hexToBytes: function (e) {
     66 for (var t = [], a = 0; a < e.length; a += 2)
     67 t.push(parseInt(e.substr(a, 2), 16));
     68 return t
     69 },
     70 bytesToBase64: function (e) {
     71 for (var t = [], n = 0; n < e.length; n += 3)
     72 for (var i = e[n] << 16 | e[n + 1] << 8 | e[n + 2], o = 0; o < 4; o++)
     73 8 * n + 6 * o <= 8 * e.length ? t.push(a.charAt(i >>> 6 * (3 - o) & 63)) : t.push("=");
     74 return t.join("")
     75 },
     76 base64ToBytes: function (e) {
     77 e = e.replace(/[^A-Z0-9+/]/gi, "");
     78 for (var t = [], n = 0, i = 0; n < e.length; i = ++n % 4)
     79 0 != i && t.push((a.indexOf(e.charAt(n - 1)) & Math.pow(2, -2 * i + 8) - 1) << 2 * i | a.indexOf(e.charAt(n)) >>> 6 - 2 * i);
     80 return t
     81 }
     82 },
     83 
     84 
     85 i = a12.utf8,
     86 o = null, // o = a(30)
     87 s = a12.bin,
     88 (r = function (e, t) {
     89 e.constructor == String ? e = t && "binary" === t.encoding ? s.stringToBytes(e) : i.stringToBytes(e) : o(e) ? e = Array.prototype.slice.call(e, 0) : Array.isArray(e) || (e = e.toString());
     90 for (var a = n.bytesToWords(e), l = 8 * e.length, c = 1732584193, d = -271733879, p = -1732584194, u = 271733878, m = 0; m < a.length; m++)
     91 a[m] = 16711935 & (a[m] << 8 | a[m] >>> 24) | 4278255360 & (a[m] << 24 | a[m] >>> 8);
     92 a[l >>> 5] |= 128 << l % 32,
     93 a[14 + (l + 64 >>> 9 << 4)] = l;
     94 var f = r._ff
     95 , h = r._gg
     96 , v = r._hh
     97 , g = r._ii;
     98 for (m = 0; m < a.length; m += 16) {
     99 var y = c
    100 , _ = d
    101 , b = p
    102 , $ = u;
    103 d = g(d = g(d = g(d = g(d = v(d = v(d = v(d = v(d = h(d = h(d = h(d = h(d = f(d = f(d = f(d = f(d, p = f(p, u = f(u, c = f(c, d, p, u, a[m + 0], 7, -680876936), d, p, a[m + 1], 12, -389564586), c, d, a[m + 2], 17, 606105819), u, c, a[m + 3], 22, -1044525330), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 4], 7, -176418897), d, p, a[m + 5], 12, 1200080426), c, d, a[m + 6], 17, -1473231341), u, c, a[m + 7], 22, -45705983), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 8], 7, 1770035416), d, p, a[m + 9], 12, -1958414417), c, d, a[m + 10], 17, -42063), u, c, a[m + 11], 22, -1990404162), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 12], 7, 1804603682), d, p, a[m + 13], 12, -40341101), c, d, a[m + 14], 17, -1502002290), u, c, a[m + 15], 22, 1236535329), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 1], 5, -165796510), d, p, a[m + 6], 9, -1069501632), c, d, a[m + 11], 14, 643717713), u, c, a[m + 0], 20, -373897302), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 5], 5, -701558691), d, p, a[m + 10], 9, 38016083), c, d, a[m + 15], 14, -660478335), u, c, a[m + 4], 20, -405537848), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 9], 5, 568446438), d, p, a[m + 14], 9, -1019803690), c, d, a[m + 3], 14, -187363961), u, c, a[m + 8], 20, 1163531501), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 13], 5, -1444681467), d, p, a[m + 2], 9, -51403784), c, d, a[m + 7], 14, 1735328473), u, c, a[m + 12], 20, -1926607734), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 5], 4, -378558), d, p, a[m + 8], 11, -2022574463), c, d, a[m + 11], 16, 1839030562), u, c, a[m + 14], 23, -35309556), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 1], 4, -1530992060), d, p, a[m + 4], 11, 1272893353), c, d, a[m + 7], 16, -155497632), u, c, a[m + 10], 23, -1094730640), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 13], 4, 681279174), d, p, a[m + 0], 11, -358537222), c, d, a[m + 3], 16, -722521979), u, c, a[m + 6], 23, 76029189), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 9], 4, -640364487), d, p, a[m + 12], 11, -421815835), c, d, a[m + 15], 16, 530742520), u, c, a[m + 2], 23, -995338651), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 0], 6, -198630844), d, p, a[m + 7], 10, 1126891415), c, d, a[m + 14], 15, -1416354905), u, c, a[m + 5], 21, -57434055), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 12], 6, 1700485571), d, p, a[m + 3], 10, -1894986606), c, d, a[m + 10], 15, -1051523), u, c, a[m + 1], 21, -2054922799), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 8], 6, 1873313359), d, p, a[m + 15], 10, -30611744), c, d, a[m + 6], 15, -1560198380), u, c, a[m + 13], 21, 1309151649), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 4], 6, -145523070), d, p, a[m + 11], 10, -1120210379), c, d, a[m + 2], 15, 718787259), u, c, a[m + 9], 21, -343485551),
    104 c = c + y >>> 0,
    105 d = d + _ >>> 0,
    106 p = p + b >>> 0,
    107 u = u + $ >>> 0
    108 }
    109 return n.endian([c, d, p, u])
    110 }
    111 )._ff = function (e, t, a, n, i, o, s) {
    112 var r = e + (t & a | ~t & n) + (i >>> 0) + s;
    113 return (r << o | r >>> 32 - o) + t
    114 }
    115 ,
    116 r._gg = function (e, t, a, n, i, o, s) {
    117 var r = e + (t & n | a & ~n) + (i >>> 0) + s;
    118 return (r << o | r >>> 32 - o) + t
    119 }
    120 ,
    121 r._hh = function (e, t, a, n, i, o, s) {
    122 var r = e + (t ^ a ^ n) + (i >>> 0) + s;
    123 return (r << o | r >>> 32 - o) + t
    124 }
    125 ,
    126 r._ii = function (e, t, a, n, i, o, s) {
    127 var r = e + (a ^ (t | ~n)) + (i >>> 0) + s;
    128 return (r << o | r >>> 32 - o) + t
    129 }
    130 ,
    131 r._blocksize = 16,
    132 r._digestsize = 16;
    133 
    134 var a = n.wordsToBytes(r(e, t));
    135 return t && t.asBytes ? a : t && t.asString ? s.bytesToString(a) : n.bytesToHex(a);
    136 }
    137 ;
    138 
    139 console.log(antitoken(e));
    View Code

    OK~成功拿到了这个antitoken。那么是不是就可以去获取酒店的评论信息啦?答案是否定的,因为这个antitoken他是全局变量。

    这里有个最简单的方法取拿数据那就是打开你的fiddler直接拿到headers以及cookies,然后直接取请求即可,不需要你通过代码取获取cookies,反正我通过selenuim以及requests获取到的cookies都是不全的,有一个关键信息始终是获取不到的那就是它。

    这个sessionID,现在你知道session与cookie的区别了吗?

    最后这时我拿到的数据

     

     总结一下:

    这次获取同程的这个antitoken,对于现在的我来说挺难的,关键是对于前端JS如何进行混淆,以及如何获取到想要的函数都还是不懂呀,分析的思路主要是原作者的思路,我只能跟着他的脚步一步一步的做。这就是经验上的差距吧。以后需要多加练习类似的具有反爬措施,且token是经过加密的网站。爬这种网站真的收获挺大的。

    还有这篇分析JS的文章是我按照原作者的步骤一步一步的执行写出来的。大家可以取知乎上看看原作者写的。他的思路更加清晰明了。

    传送门:

    https://zhuanlan.zhihu.com/p/54627024

  • 相关阅读:
    leetcode--Populating Next Right Pointers in Each Node II
    leetcode—Populating Next Right Pointers in Each Node
    Pascal's Triangle II
    leetcode—pascal triangle
    leetcode—triangle
    October 23rd, 2017 Week 43rd Monday
    October 22nd, 2017 Week 43rd Sunday
    October 21st 2017 Week 42nd Saturday
    October 20th 2017 Week 42nd Friday
    October 19th 2017 Week 42nd Thursday
  • 原文地址:https://www.cnblogs.com/pontoon/p/10478664.html
Copyright © 2011-2022 走看看