zoukankan      html  css  js  c++  java
  • (反反爬虫)同程网酒店用户评论的全局token

    反反爬虫系列将会陆续的介绍一些爬虫的高级玩法,主要就是分析网站的加密方式,通过python代码实现并爬取数据!

     OK,今天要分析的网站为同程网,获取其酒店的用户评论,评论信息通过JSON返回的API。页面大概长这个样子。

    抓包调试一下~F12却出现这个蛋疼的东西,对chrome控制台动了手脚。

    很明显的就能发现是这段JS在搞鬼

     

    没办法ctrl+s保存html到本地。将这段JS找到把里面的函数删除掉即可

    这里有坑的,这些JS文件都是压缩过的,所以务必要保持结构的完整性。在用sublime打开的时候,删除这段函数的内容即可,不要视图格式化这个JS文件。

     OK!这个时候再打开本地保存的html文件,F12就可以进行抓包分析了。(当然了你和我一样用fiddler抓包的话,上面的就相当于白说。对于这种反爬对控制台动手脚的网站,建议用fiddler进行抓包分析!)

    headers = {
            'Host': 'www.ly.com',
            "Referer": "https://www.ly.com/HotelInfo-92515879.html",
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/71.0.3578.98 Safari/537.36',
        }
    
    params = {
            'hotelid': "92515879",
            'page': "1",
            'pageSize': '10',
            'commentType': '0',
            'roomTypeId': '',
            'tripPurposeId': '',
            'RankType': '1',
            'mainTagId': '',
            'subTagId': '',
            'antitoken': "54fc51cc98d934d4b2e054c58ca905f6",
        }

     很明显就能发现一个关键参数antitoken。接下来就是获取这个antitoken。搜索一下antitoken关键字。

     

    搜索出来的结果都指向last.js文件,那就没啥好说的观察这个文件里面的函数。将这个文件全部复制下,然后在Sourses里新建一个Snippets,将代码复制进去,继续搜索antitoken。

    这就是一段ajax代码,从cookie中获取“wangba”,在cookie中找到wangba,发现他是一个类似于时间戳的东西,后面其实也介绍到了,e其实就是一个时间戳。那就好说了,直接模拟。

    e=(new Date).getTime().toString()

    直接在snippet中修改,然后在debug调试下JS找到antitoken的生成函数。两步找到函数~

    function(e, t, a) {
        var n, i, o, s, r;
        n = a(29),
        i = a(12).utf8,
        o = a(30),
        s = a(12).bin,
        (r = function(e, t) {
            e.constructor == String ? e = t && "binary" === t.encoding ? s.stringToBytes(e) : i.stringToBytes(e) : o(e) ? e = Array.prototype.slice.call(e, 0) : Array.isArray(e) || (e = e.toString());
            for (var a = n.bytesToWords(e), l = 8 * e.length, c = 1732584193, d = -271733879, p = -1732584194, u = 271733878, m = 0; m < a.length; m++)
                a[m] = 16711935 & (a[m] << 8 | a[m] >>> 24) | 4278255360 & (a[m] << 24 | a[m] >>> 8);
            a[l >>> 5] |= 128 << l % 32,
            a[14 + (l + 64 >>> 9 << 4)] = l;
            var f = r._ff
              , h = r._gg
              , v = r._hh
              , g = r._ii;
            for (m = 0; m < a.length; m += 16) {
                var y = c
                  , _ = d
                  , b = p
                  , $ = u;
                d = g(d = g(d = g(d = g(d = v(d = v(d = v(d = v(d = h(d = h(d = h(d = h(d = f(d = f(d = f(d = f(d, p = f(p, u = f(u, c = f(c, d, p, u, a[m + 0], 7, -680876936), d, p, a[m + 1], 12, -389564586), c, d, a[m + 2], 17, 606105819), u, c, a[m + 3], 22, -1044525330), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 4], 7, -176418897), d, p, a[m + 5], 12, 1200080426), c, d, a[m + 6], 17, -1473231341), u, c, a[m + 7], 22, -45705983), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 8], 7, 1770035416), d, p, a[m + 9], 12, -1958414417), c, d, a[m + 10], 17, -42063), u, c, a[m + 11], 22, -1990404162), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 12], 7, 1804603682), d, p, a[m + 13], 12, -40341101), c, d, a[m + 14], 17, -1502002290), u, c, a[m + 15], 22, 1236535329), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 1], 5, -165796510), d, p, a[m + 6], 9, -1069501632), c, d, a[m + 11], 14, 643717713), u, c, a[m + 0], 20, -373897302), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 5], 5, -701558691), d, p, a[m + 10], 9, 38016083), c, d, a[m + 15], 14, -660478335), u, c, a[m + 4], 20, -405537848), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 9], 5, 568446438), d, p, a[m + 14], 9, -1019803690), c, d, a[m + 3], 14, -187363961), u, c, a[m + 8], 20, 1163531501), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 13], 5, -1444681467), d, p, a[m + 2], 9, -51403784), c, d, a[m + 7], 14, 1735328473), u, c, a[m + 12], 20, -1926607734), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 5], 4, -378558), d, p, a[m + 8], 11, -2022574463), c, d, a[m + 11], 16, 1839030562), u, c, a[m + 14], 23, -35309556), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 1], 4, -1530992060), d, p, a[m + 4], 11, 1272893353), c, d, a[m + 7], 16, -155497632), u, c, a[m + 10], 23, -1094730640), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 13], 4, 681279174), d, p, a[m + 0], 11, -358537222), c, d, a[m + 3], 16, -722521979), u, c, a[m + 6], 23, 76029189), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 9], 4, -640364487), d, p, a[m + 12], 11, -421815835), c, d, a[m + 15], 16, 530742520), u, c, a[m + 2], 23, -995338651), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 0], 6, -198630844), d, p, a[m + 7], 10, 1126891415), c, d, a[m + 14], 15, -1416354905), u, c, a[m + 5], 21, -57434055), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 12], 6, 1700485571), d, p, a[m + 3], 10, -1894986606), c, d, a[m + 10], 15, -1051523), u, c, a[m + 1], 21, -2054922799), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 8], 6, 1873313359), d, p, a[m + 15], 10, -30611744), c, d, a[m + 6], 15, -1560198380), u, c, a[m + 13], 21, 1309151649), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 4], 6, -145523070), d, p, a[m + 11], 10, -1120210379), c, d, a[m + 2], 15, 718787259), u, c, a[m + 9], 21, -343485551),
                c = c + y >>> 0,
                d = d + _ >>> 0,
                p = p + b >>> 0,
                u = u + $ >>> 0
            }
            return n.endian([c, d, p, u])
        }
        )._ff = function(e, t, a, n, i, o, s) {
            var r = e + (t & a | ~t & n) + (i >>> 0) + s;
            return (r << o | r >>> 32 - o) + t
        }
        ,
        r._gg = function(e, t, a, n, i, o, s) {
            var r = e + (t & n | a & ~n) + (i >>> 0) + s;
            return (r << o | r >>> 32 - o) + t
        }
        ,
        r._hh = function(e, t, a, n, i, o, s) {
            var r = e + (t ^ a ^ n) + (i >>> 0) + s;
            return (r << o | r >>> 32 - o) + t
        }
        ,
        r._ii = function(e, t, a, n, i, o, s) {
            var r = e + (a ^ (t | ~n)) + (i >>> 0) + s;
            return (r << o | r >>> 32 - o) + t
        }
        ,
        r._blocksize = 16,
        r._digestsize = 16,
        e.exports = function(e, t) {
            if (e === undefined || null === e)
                throw new Error("Illegal argument " + e);
            var a = n.wordsToBytes(r(e, t));
            return t && t.asBytes ? a : t && t.asString ? s.bytesToString(a) : n.bytesToHex(a)
        }
    }

    这个函数接收e,t,a三个参数,最后返回t对象,定义了一些变量,var n, i, o, s, r; 就是根据这些变量返回t对象,调用t.getantitoken从而获取到antitoken值。OK思路有了接下来就是伪造这些n, i, o, s, 那么如何进行伪造咧,很简单。继续调试,找到这几个变量的生成函数。

     

    这几个参数都是由a这个对象生成的。找a

     

    将之前的断点取消,在n出打断点,运行程序。然后控制台输入a(29)果然得到一个函数~

    function(e, t) {
        var a, n;
        a = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
        n = {
            rotl: function(e, t) {
                return e << t | e >>> 32 - t
            },
            rotr: function(e, t) {
                return e << 32 - t | e >>> t
            },
            endian: function(e) {
                if (e.constructor == Number)
                    return 16711935 & n.rotl(e, 8) | 4278255360 & n.rotl(e, 24);
                for (var t = 0; t < e.length; t++)
                    e[t] = n.endian(e[t]);
                return e
            },
            randomBytes: function(e) {
                for (var t = []; e > 0; e--)
                    t.push(Math.floor(256 * Math.random()));
                return t
            },
            bytesToWords: function(e) {
                for (var t = [], a = 0, n = 0; a < e.length; a++,
                n += 8)
                    t[n >>> 5] |= e[a] << 24 - n % 32;
                return t
            },
            wordsToBytes: function(e) {
                for (var t = [], a = 0; a < 32 * e.length; a += 8)
                    t.push(e[a >>> 5] >>> 24 - a % 32 & 255);
                return t
            },
            bytesToHex: function(e) {
                for (var t = [], a = 0; a < e.length; a++)
                    t.push((e[a] >>> 4).toString(16)),
                    t.push((15 & e[a]).toString(16));
                return t.join("")
            },
            hexToBytes: function(e) {
                for (var t = [], a = 0; a < e.length; a += 2)
                    t.push(parseInt(e.substr(a, 2), 16));
                return t
            },
            bytesToBase64: function(e) {
                for (var t = [], n = 0; n < e.length; n += 3)
                    for (var i = e[n] << 16 | e[n + 1] << 8 | e[n + 2], o = 0; o < 4; o++)
                        8 * n + 6 * o <= 8 * e.length ? t.push(a.charAt(i >>> 6 * (3 - o) & 63)) : t.push("=");
                return t.join("")
            },
            base64ToBytes: function(e) {
                e = e.replace(/[^A-Z0-9+/]/gi, "");
                for (var t = [], n = 0, i = 0; n < e.length; i = ++n % 4)
                    0 != i && t.push((a.indexOf(e.charAt(n - 1)) & Math.pow(2, -2 * i + 8) - 1) << 2 * i | a.indexOf(e.charAt(n)) >>> 6 - 2 * i);
                return t
            }
        },
        e.exports = n
    }

    再次输入a(12)

    function(e, t) {
        var a = {
            utf8: {
                stringToBytes: function(e) {
                    return a.bin.stringToBytes(unescape(encodeURIComponent(e)))
                },
                bytesToString: function(e) {
                    return decodeURIComponent(escape(a.bin.bytesToString(e)))
                }
            },
            bin: {
                stringToBytes: function(e) {
                    for (var t = [], a = 0; a < e.length; a++)
                        t.push(255 & e.charCodeAt(a));
                    return t
                },
                bytesToString: function(e) {
                    for (var t = [], a = 0; a < e.length; a++)
                        t.push(String.fromCharCode(e[a]));
                    return t.join("")
                }
            }
        };
        e.exports = a
    }

     

    a(30) = Null

     所以n、i、o、s这几个参数都找到了,那么就是构建antitoken函数,带上关键参数生成了。

      1 e = (new Date()).getTime().toString();
      2 
      3 //定义antitoken
      4 function antitoken(e) {
      5 var a12 = {
      6 utf8: {
      7 stringToBytes: function (e) {
      8 return a12.bin.stringToBytes(unescape(encodeURIComponent(e)))
      9 },
     10 bytesToString: function (e) {
     11 return decodeURIComponent(escape(a.bin.bytesToString(e)))
     12 }
     13 },
     14 bin: {
     15 stringToBytes: function (e) {
     16 for (var t = [], a = 0; a < e.length; a++)
     17 t.push(255 & e.charCodeAt(a));
     18 return t
     19 },
     20 bytesToString: function (e) {
     21 for (var t = [], a = 0; a < e.length; a++)
     22 t.push(String.fromCharCode(e[a]));
     23 return t.join("")
     24 }
     25 }
     26 };
     27 var t = null;
     28 var n, i, o, s, r;
     29 n = {
     30 rotl: function (e, t) {
     31 return e << t | e >>> 32 - t
     32 },
     33 rotr: function (e, t) {
     34 return e << 32 - t | e >>> t
     35 },
     36 endian: function (e) {
     37 if (e.constructor == Number)
     38 return 16711935 & n.rotl(e, 8) | 4278255360 & n.rotl(e, 24);
     39 for (var t = 0; t < e.length; t++)
     40 e[t] = n.endian(e[t]);
     41 return e
     42 },
     43 randomBytes: function (e) {
     44 for (var t = []; e > 0; e--)
     45 t.push(Math.floor(256 * Math.random()));
     46 return t
     47 },
     48 bytesToWords: function (e) {
     49 for (var t = [], a = 0, n = 0; a < e.length; a++,
     50 n += 8)
     51 t[n >>> 5] |= e[a] << 24 - n % 32;
     52 return t
     53 },
     54 wordsToBytes: function (e) {
     55 for (var t = [], a = 0; a < 32 * e.length; a += 8)
     56 t.push(e[a >>> 5] >>> 24 - a % 32 & 255);
     57 return t
     58 },
     59 bytesToHex: function (e) {
     60 for (var t = [], a = 0; a < e.length; a++)
     61 t.push((e[a] >>> 4).toString(16)),
     62 t.push((15 & e[a]).toString(16));
     63 return t.join("")
     64 },
     65 hexToBytes: function (e) {
     66 for (var t = [], a = 0; a < e.length; a += 2)
     67 t.push(parseInt(e.substr(a, 2), 16));
     68 return t
     69 },
     70 bytesToBase64: function (e) {
     71 for (var t = [], n = 0; n < e.length; n += 3)
     72 for (var i = e[n] << 16 | e[n + 1] << 8 | e[n + 2], o = 0; o < 4; o++)
     73 8 * n + 6 * o <= 8 * e.length ? t.push(a.charAt(i >>> 6 * (3 - o) & 63)) : t.push("=");
     74 return t.join("")
     75 },
     76 base64ToBytes: function (e) {
     77 e = e.replace(/[^A-Z0-9+/]/gi, "");
     78 for (var t = [], n = 0, i = 0; n < e.length; i = ++n % 4)
     79 0 != i && t.push((a.indexOf(e.charAt(n - 1)) & Math.pow(2, -2 * i + 8) - 1) << 2 * i | a.indexOf(e.charAt(n)) >>> 6 - 2 * i);
     80 return t
     81 }
     82 },
     83 
     84 
     85 i = a12.utf8,
     86 o = null, // o = a(30)
     87 s = a12.bin,
     88 (r = function (e, t) {
     89 e.constructor == String ? e = t && "binary" === t.encoding ? s.stringToBytes(e) : i.stringToBytes(e) : o(e) ? e = Array.prototype.slice.call(e, 0) : Array.isArray(e) || (e = e.toString());
     90 for (var a = n.bytesToWords(e), l = 8 * e.length, c = 1732584193, d = -271733879, p = -1732584194, u = 271733878, m = 0; m < a.length; m++)
     91 a[m] = 16711935 & (a[m] << 8 | a[m] >>> 24) | 4278255360 & (a[m] << 24 | a[m] >>> 8);
     92 a[l >>> 5] |= 128 << l % 32,
     93 a[14 + (l + 64 >>> 9 << 4)] = l;
     94 var f = r._ff
     95 , h = r._gg
     96 , v = r._hh
     97 , g = r._ii;
     98 for (m = 0; m < a.length; m += 16) {
     99 var y = c
    100 , _ = d
    101 , b = p
    102 , $ = u;
    103 d = g(d = g(d = g(d = g(d = v(d = v(d = v(d = v(d = h(d = h(d = h(d = h(d = f(d = f(d = f(d = f(d, p = f(p, u = f(u, c = f(c, d, p, u, a[m + 0], 7, -680876936), d, p, a[m + 1], 12, -389564586), c, d, a[m + 2], 17, 606105819), u, c, a[m + 3], 22, -1044525330), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 4], 7, -176418897), d, p, a[m + 5], 12, 1200080426), c, d, a[m + 6], 17, -1473231341), u, c, a[m + 7], 22, -45705983), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 8], 7, 1770035416), d, p, a[m + 9], 12, -1958414417), c, d, a[m + 10], 17, -42063), u, c, a[m + 11], 22, -1990404162), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 12], 7, 1804603682), d, p, a[m + 13], 12, -40341101), c, d, a[m + 14], 17, -1502002290), u, c, a[m + 15], 22, 1236535329), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 1], 5, -165796510), d, p, a[m + 6], 9, -1069501632), c, d, a[m + 11], 14, 643717713), u, c, a[m + 0], 20, -373897302), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 5], 5, -701558691), d, p, a[m + 10], 9, 38016083), c, d, a[m + 15], 14, -660478335), u, c, a[m + 4], 20, -405537848), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 9], 5, 568446438), d, p, a[m + 14], 9, -1019803690), c, d, a[m + 3], 14, -187363961), u, c, a[m + 8], 20, 1163531501), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 13], 5, -1444681467), d, p, a[m + 2], 9, -51403784), c, d, a[m + 7], 14, 1735328473), u, c, a[m + 12], 20, -1926607734), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 5], 4, -378558), d, p, a[m + 8], 11, -2022574463), c, d, a[m + 11], 16, 1839030562), u, c, a[m + 14], 23, -35309556), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 1], 4, -1530992060), d, p, a[m + 4], 11, 1272893353), c, d, a[m + 7], 16, -155497632), u, c, a[m + 10], 23, -1094730640), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 13], 4, 681279174), d, p, a[m + 0], 11, -358537222), c, d, a[m + 3], 16, -722521979), u, c, a[m + 6], 23, 76029189), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 9], 4, -640364487), d, p, a[m + 12], 11, -421815835), c, d, a[m + 15], 16, 530742520), u, c, a[m + 2], 23, -995338651), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 0], 6, -198630844), d, p, a[m + 7], 10, 1126891415), c, d, a[m + 14], 15, -1416354905), u, c, a[m + 5], 21, -57434055), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 12], 6, 1700485571), d, p, a[m + 3], 10, -1894986606), c, d, a[m + 10], 15, -1051523), u, c, a[m + 1], 21, -2054922799), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 8], 6, 1873313359), d, p, a[m + 15], 10, -30611744), c, d, a[m + 6], 15, -1560198380), u, c, a[m + 13], 21, 1309151649), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 4], 6, -145523070), d, p, a[m + 11], 10, -1120210379), c, d, a[m + 2], 15, 718787259), u, c, a[m + 9], 21, -343485551),
    104 c = c + y >>> 0,
    105 d = d + _ >>> 0,
    106 p = p + b >>> 0,
    107 u = u + $ >>> 0
    108 }
    109 return n.endian([c, d, p, u])
    110 }
    111 )._ff = function (e, t, a, n, i, o, s) {
    112 var r = e + (t & a | ~t & n) + (i >>> 0) + s;
    113 return (r << o | r >>> 32 - o) + t
    114 }
    115 ,
    116 r._gg = function (e, t, a, n, i, o, s) {
    117 var r = e + (t & n | a & ~n) + (i >>> 0) + s;
    118 return (r << o | r >>> 32 - o) + t
    119 }
    120 ,
    121 r._hh = function (e, t, a, n, i, o, s) {
    122 var r = e + (t ^ a ^ n) + (i >>> 0) + s;
    123 return (r << o | r >>> 32 - o) + t
    124 }
    125 ,
    126 r._ii = function (e, t, a, n, i, o, s) {
    127 var r = e + (a ^ (t | ~n)) + (i >>> 0) + s;
    128 return (r << o | r >>> 32 - o) + t
    129 }
    130 ,
    131 r._blocksize = 16,
    132 r._digestsize = 16;
    133 
    134 var a = n.wordsToBytes(r(e, t));
    135 return t && t.asBytes ? a : t && t.asString ? s.bytesToString(a) : n.bytesToHex(a);
    136 }
    137 ;
    138 
    139 console.log(antitoken(e));
    View Code

    OK~成功拿到了这个antitoken。那么是不是就可以去获取酒店的评论信息啦?答案是否定的,因为这个antitoken他是全局变量。

    这里有个最简单的方法取拿数据那就是打开你的fiddler直接拿到headers以及cookies,然后直接取请求即可,不需要你通过代码取获取cookies,反正我通过selenuim以及requests获取到的cookies都是不全的,有一个关键信息始终是获取不到的那就是它。

    这个sessionID,现在你知道session与cookie的区别了吗?

    最后这时我拿到的数据

     

     总结一下:

    这次获取同程的这个antitoken,对于现在的我来说挺难的,关键是对于前端JS如何进行混淆,以及如何获取到想要的函数都还是不懂呀,分析的思路主要是原作者的思路,我只能跟着他的脚步一步一步的做。这就是经验上的差距吧。以后需要多加练习类似的具有反爬措施,且token是经过加密的网站。爬这种网站真的收获挺大的。

    还有这篇分析JS的文章是我按照原作者的步骤一步一步的执行写出来的。大家可以取知乎上看看原作者写的。他的思路更加清晰明了。

    传送门:

    https://zhuanlan.zhihu.com/p/54627024

  • 相关阅读:
    JavaScript得到当前窗口的所有大小值
    JavaScript 变量、作用域和内存问题
    jQuery html5Validate基于HTML5表单验证插件
    新世界
    2001年的火花
    High Dynamic Range Compression on Programmable Graphics Hardware
    运筹帷幄
    你还要在学校找什么东西?
    图行天下
    Supra Team
  • 原文地址:https://www.cnblogs.com/pontoon/p/10478664.html
Copyright © 2011-2022 走看看