起源:
解析一网站时,发现其视频信息为一段js代码动态生成,而此段js代码,是用packer加密的。
其加密后的代码段如下:
eval(function(p,a,c,k,e,d){e=function(c){return(c<a?'':e(parseInt(c/a)))+((c=c%a)>35?String.fromCharCode(c+29):c.toString(36))};if(!''.replace(/^/,String)){while(c--){d[e(c)]=k[c]||e(c)}k=[function(e){return d[e]}];e=function(){return'\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\b'+e(c)+'\b','g'),k[c])}}return p}('G D="";G F=[y,8,r,z,l,9,4,j,8,l,9,m,6,t,o,o,c,8,x,5,5,8,0,1,i,q,u,8,t,k,l,4,q,w,r,5,8,p,0,0,1,d,0,1,1,1,d,h,d,1,d,h,2,f,0,f,e,5,0,2,b,h,2,f,5,0,2,b,1,b,3,5,0,2,7,2,i,e,5,a,a,d,3,4,a,3,e,h,2,f,9,k,2,a,7,7,3,g,a,i,3,0,3,0,g,0,3,e,1,1,4,g,n,k,p,b,3,4,0,b,3,2,b,p,a,9,9,n,g,9,7,C,7,2,1,q,v,c,7,6,j,o,A,c,4,m,6,u,w,n,4,r,5,v,c,7,6,j,s,k,g,4,s,m,6,7,2,1,c,6,j,l,4,8,m,6,7,2,1,6,B,y,8,r,z,l,9,4,j,8,l,9,m,6,t,o,o,c,8,x,5,5,8,0,1,i,q,u,8,t,k,l,4,q,w,r,5,8,p,0,0,1,d,0,1,1,1,d,h,d,1,d,h,2,f,0,f,e,5,0,2,b,h,2,f,5,0,2,b,1,b,3,5,0,2,7,2,i,e,5,a,a,d,3,4,a,3,e,h,2,f,9,k,2,a,7,7,3,g,a,i,3,0,3,0,g,0,3,e,1,1,4,g,n,k,p,b,3,4,0,b,3,2,b,p,a,9,9,n,g,9,7,C,f,i,1,q,v,c,7,6,j,o,A,c,4,m,6,u,w,n,4,r,5,v,c,7,6,j,s,k,g,4,s,m,6,f,i,1,c,6,j,l,4,8,m,6,f,i,1,6,B,y,8,r,z,l,9,4,j,8,l,9,m,6,t,o,o,c,8,x,5,5,8,0,1,i,q,u,8,t,k,l,4,q,w,r,5,8,p,0,0,1,d,0,1,1,1,d,h,d,1,d,h,2,f,0,f,e,5,0,2,b,h,2,f,5,0,2,b,1,b,3,5,0,2,7,2,i,e,5,a,a,d,3,4,a,3,e,h,2,f,9,k,2,a,7,7,3,g,a,i,3,0,3,0,g,0,3,e,1,1,4,g,n,k,p,b,3,4,0,b,3,2,b,p,a,9,9,n,g,9,7,C,e,7,1,q,v,c,7,6,j,o,A,c,4,m,6,u,w,n,4,r,5,v,c,7,6,j,s,k,g,4,s,m,6,e,7,1,c,6,j,l,4,8,m,6,e,7,1,6,B];F.J(L K(E){D+=M.N(O(E)-P)});$("#I").H(D);',52,52,'82695481|82695480|82695488|82695489|82695533|82695479|82695466|82695484|82695547|82695531|82695534|82695485|82695544|82695477|82695482|82695483|82695530|82695487|82695486|82695464|82695529|82695546|82695493|82695532|82695548|82695476|82695478|82695543|82695540|82695536|82695550|82695541|82695537|82695490|82695492|82695549|82695553|82695494|82695527|OzPwOHpTT|value|kdhtqgiya|var|append|videojs|forEach|ROrJdyTuoo|function|String|fromCharCode|parseInt|82695432'.split('|'),0,{}))
因此遍寻方法解密,而终于用PyExecjs实现解密,发贴至github之youtube-dl论区,得高人指点,youtube_dl框架中,已有解密函数,因此记录,以做备忘。
1、解密函数
def decode_packed_codes(code): def encode_base_n(num, n, table=None): FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' if not table: table = FULL_TABLE[:n] if n > len(table): raise ValueError('base %d exceeds table length %d' % (n, len(table))) if num == 0: return table[0] ret = '' while num: ret = table[num % n] + ret num = num // n return ret pattern = r"}('(.+)',(d+),(d+),'([^']+)'.split('|')" mobj = re.search(pattern, code) obfucasted_code, base, count, symbols = mobj.groups() base = int(base) count = int(count) symbols = symbols.split('|') symbol_table = {} while count: count -= 1 base_n_count = encode_base_n(count, base) symbol_table[base_n_count] = symbols[count] or base_n_count return re.sub( r'(w+)', lambda mobj: symbol_table[mobj.group(0)], obfucasted_code)
2、例程验证
s = ''' eval(... ''' js = decode_packed_codes(s) print js
其实此函数可从整个页代码代码中匹配出eval代码段,因此传所获取的页面源代码做参数即可。
所解密出来js代码段为:
var OzPwOHpTT="";var kdhtqgiya=[82695492,82695547,82695543,82695549,82695546,82695531,82695533,82695464,82695547,82695546,82695531,82695493,82695466,82695536,82695548,82695548,82695544,82695547,82695490,82695479,82695479,82695547,82695481,82695480,82695486,82695478,82695550,82695547,82695536,82695529,82695546,82695533,82695478,82695537,82695543,82695479,82695547,82695476,82695481,82695481,82695480,82695477,82695481,82695480,82695480,82695480,82695477,82695487,82695477,82695480,82695477,82695487,82695488,82695483,82695481,82695483,82695482,82695479,82695481,82695488,82695485,82695487,82695488,82695483,82695479,82695481,82695488,82695485,82695480,82695485,82695489,82695479,82695481,82695488,82695484,82695488,82695486,82695482,82695479,82695534,82695534,82695477,82695489,82695533,82695534,82695489,82695482,82695487,82695488,82695483,82695531,82695529,82695488,82695534,82695484,82695484,82695489,82695530,82695534,82695486,82695489,82695481,82695489,82695481,82695530,82695481,82695489,82695482,82695480,82695480,82695533,82695530,82695532,82695529,82695476,82695485,82695489,82695533,82695481,82695485,82695489,82695488,82695485,82695476,82695534,82695531,82695531,82695532,82695530,82695531,82695484,82695527,82695484,82695488,82695480,82695478,82695541,82695544,82695484,82695466,82695464,82695548,82695553,82695544,82695533,82695493,82695466,82695550,82695537,82695532,82695533,82695543,82695479,82695541,82695544,82695484,82695466,82695464,82695540,82695529,82695530,82695533,82695540,82695493,82695466,82695484,82695488,82695480,82695544,82695466,82695464,82695546,82695533,82695547,82695493,82695466,82695484,82695488,82695480,82695466,82695494,82695492,82695547,82695543,82695549,82695546,82695531,82695533,82695464,82695547,82695546,82695531,82695493,82695466,82695536,82695548,82695548,82695544,82695547,82695490,82695479,82695479,82695547,82695481,82695480,82695486,82695478,82695550,82695547,82695536,82695529,82695546,82695533,82695478,82695537,82695543,82695479,82695547,82695476,82695481,82695481,82695480,82695477,82695481,82695480,82695480,82695480,82695477,82695487,82695477,82695480,82695477,82695487,82695488,82695483,82695481,82695483,82695482,82695479,82695481,82695488,82695485,82695487,82695488,82695483,82695479,82695481,82695488,82695485,82695480,82695485,82695489,82695479,82695481,82695488,82695484,82695488,82695486,82695482,82695479,82695534,82695534,82695477,82695489,82695533,82695534,82695489,82695482,82695487,82695488,82695483,82695531,82695529,82695488,82695534,82695484,82695484,82695489,82695530,82695534,82695486,82695489,82695481,82695489,82695481,82695530,82695481,82695489,82695482,82695480,82695480,82695533,82695530,82695532,82695529,82695476,82695485,82695489,82695533,82695481,82695485,82695489,82695488,82695485,82695476,82695534,82695531,82695531,82695532,82695530,82695531,82695484,82695527,82695483,82695486,82695480,82695478,82695541,82695544,82695484,82695466,82695464,82695548,82695553,82695544,82695533,82695493,82695466,82695550,82695537,82695532,82695533,82695543,82695479,82695541,82695544,82695484,82695466,82695464,82695540,82695529,82695530,82695533,82695540,82695493,82695466,82695483,82695486,82695480,82695544,82695466,82695464,82695546,82695533,82695547,82695493,82695466,82695483,82695486,82695480,82695466,82695494,82695492,82695547,82695543,82695549,82695546,82695531,82695533,82695464,82695547,82695546,82695531,82695493,82695466,82695536,82695548,82695548,82695544,82695547,82695490,82695479,82695479,82695547,82695481,82695480,82695486,82695478,82695550,82695547,82695536,82695529,82695546,82695533,82695478,82695537,82695543,82695479,82695547,82695476,82695481,82695481,82695480,82695477,82695481,82695480,82695480,82695480,82695477,82695487,82695477,82695480,82695477,82695487,82695488,82695483,82695481,82695483,82695482,82695479,82695481,82695488,82695485,82695487,82695488,82695483,82695479,82695481,82695488,82695485,82695480,82695485,82695489,82695479,82695481,82695488,82695484,82695488,82695486,82695482,82695479,82695534,82695534,82695477,82695489,82695533,82695534,82695489,82695482,82695487,82695488,82695483,82695531,82695529,82695488,82695534,82695484,82695484,82695489,82695530,82695534,82695486,82695489,82695481,82695489,82695481,82695530,82695481,82695489,82695482,82695480,82695480,82695533,82695530,82695532,82695529,82695476,82695485,82695489,82695533,82695481,82695485,82695489,82695488,82695485,82695476,82695534,82695531,82695531,82695532,82695530,82695531,82695484,82695527,82695482,82695484,82695480,82695478,82695541,82695544,82695484,82695466,82695464,82695548,82695553,82695544,82695533,82695493,82695466,82695550,82695537,82695532,82695533,82695543,82695479,82695541,82695544,82695484,82695466,82695464,82695540,82695529,82695530,82695533,82695540,82695493,82695466,82695482,82695484,82695480,82695544,82695466,82695464,82695546,82695533,82695547,82695493,82695466,82695482,82695484,82695480,82695466,82695494];kdhtqgiya.forEach(function ROrJdyTuoo(value){OzPwOHpTT+=String.fromCharCode(parseInt(value)-82695432)});$("#videojs").append(OzPwOHpTT);
再以js字串组之,以execjs执行,其中内容尽显;而execjs所用JScript引擎,居然不识forEach函数,因此展开处理之。
# 处理为可解析格式 base = self._search_regex(r'-(d+))', js, 'base') args =self._search_regex(r'([[^]]+])', js, 'args') js = ''' function f(){ var v = %s; length = v.length; var r = ''; for (var k = 0; k < length; k++) { r += String.fromCharCode(parseInt(v[k]) - %s); } return r; } ''' % (args, base) ctx = execjs.compile(js) source = ctx.call('f')
OK,其苦心隐藏的真实代码,就这样,显示在面前了
参考资料: