zoukankan      html  css  js  c++  java
  • Nodejs做整站转发

     刚接触nodejs,做个东西练下手,通过nodejs直接转发整站,原本想把内容全翻译成英文,但google对流量行审查,被封IP,所以就没啥用了, 效果像这样 

    var b = function (a, b) {
    	for (var d = 0; d < b.length - 2; d += 3) {
    		var c = b.charAt(d + 2),
    			c = "a" <= c ? c.charCodeAt(0) - 87 : Number(c),
    			c = "+" == b.charAt(d + 1) ? a >>> c : a << c;
    		a = "+" == b.charAt(d) ? a + c & 4294967295 : a ^ c
    	}
    	return a
    }
    
    var gettk =  function (a,TKK) {
    	//console.log(a,TKK);
    	for (var e = TKK.split("."), h = Number(e[0]) || 0, g = [], d = 0, f = 0; f < a.length; f++) {
    		var c = a.charCodeAt(f);
    		128 > c ? g[d++] = c : (2048 > c ? g[d++] = c >> 6 | 192 : (55296 == (c & 64512) && f + 1 < a.length && 56320 == (a.charCodeAt(f + 1) & 64512) ? (c = 65536 + ((c & 1023) << 10) + (a.charCodeAt(++f) & 1023), g[d++] = c >> 18 | 240, g[d++] = c >> 12 & 63 | 128) : g[d++] = c >> 12 | 224, g[d++] = c >> 6 & 63 | 128), g[d++] = c & 63 | 128)
    	}
    	a = h;
    	for (d = 0; d < g.length; d++) a += g[d], a = b(a, "+-a^+6");
    	a = b(a, "+-3^+b+-f");
    	a ^= Number(e[1]) || 0;
    	0 > a && (a = (a & 2147483647) + 2147483648);
    	a %= 1E6;
    	return a.toString() + "." + (a ^ h)
    } 
    
    function getTransRecursive(text,allrs,callback)
    {
    	console.log(text.length);
    	var surplus=text.substring(4000); 
    	text=text.substring(0,4000);
    	getTrans(text,function(rs){
    		allrs+=rs;
    		if(surplus.length>0)
    		{
    			getTransRecursive(surplus,allrs,callback);
    		}else{
    			callback(allrs);
    		}
    	});
    }
     function getTrans(text,callback)
     { 
    	  
    	 var tk=gettk(text, "424997.418814026");
    	 var url='/translate_a/single?client=t&sl=zh-CN&tl=en&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&pc=1&otf=1&ssel=6&tsel=3&kc=0&tk='+ tk ; 
    	 var options = {    
    			hostname: 'translate.google.cn',    
    			port: 80,    
    			path: url,    
    			method: 'POST',
    			headers: { // 必选信息,  可以抓包工看一下
    				"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"
    			}		
    		 }; 
    	 var tbody="";	 
    	 var req = http.request(options, function (remoteRes) {   
    			remoteRes.setEncoding('utf8');    
    			remoteRes.on('data', function (chunk) { 
    				 tbody+=chunk;
    			}); 
    			remoteRes.on("end",function(){   
    						  try
    						  { 
    							tbody=eval(tbody);
    							tbody=tbody[0];
    							var rstext="";
    							for(var i in tbody)
    							{
    								rstext+=tbody[i][0];
    							}
    							callback(rstext);
    						  }
    						  catch(err)
    						  {
    							console.log("transErr1:");
    							console.log(err);
    							callback(text);
    						  } 
    				  });			
    		});    
    		req.on('error', function (e) {    
    			console.log('transErr2:' + e.message);    
    			callback(text);
    		}); 
    		req.write("q="+ encodeURI(text));
    		req.end();		
     } 
     
    function handleStr(newhost,url,str,isHtml,callback)
    {  
    	if(!isHtml||url==""||url=="/")
    	{
    		callback(str);
    		return;
    	}
    	str= str.replace(/<script (([sS])*?)</script>/g,"");
    	//替换掉所有列表中的网址
    	for(var key in hostList){    
    		str= str.replace(hostList[key],key); 
    	} 
    	
    	//callback(str); 
    	 
    	//根据域名进行翻译
    	switch(newhost)
    	{
    		case "www.guancha.cn": 
    			var $ = cheerio.load(str);
    			var headtitle=$("head>title").text();
    			headtitle=unescape(headtitle.replace(/&#x/g,'%u').replace(/;/g,'')); 
    			
    			var nav=$(".nav").html();
    			nav=unescape(nav.replace(/&#x/g,'%u').replace(/;/g,'')); 
    			 
    			var str = $('.all-txt').text();
    			var title= $('.left-main').find('h3').text();
    			 
    			getTransRecursive(str,"",function(str){
    				$('.all-txt').text(str);
    				getTransRecursive(title,"",function(title){
    					$('.left-main').find('h3').text(title);
    					getTransRecursive(headtitle,"",function(head){
    						$("head>title").text(title);
    						getTransRecursive(nav,"",function(nav){
    							$(".nav").html(nav);
    							callback($.html());
    						});
    					});
    				});
    			}); 
    		     
    		break;
    		default:
    			callback(str);
    		break;
    	}  
    }
    var hostList={"www.thiscnnews.com":"www.guancha.cn","localhost1:8080":"user.guancha.cn"}
    var noCatchList=['/?s=dhshouye','/internation?s=dhguoji','/military-affairs?s=dhjunshi','/economy?s=dhcaijing','/industry-science?s=dhkeji','/TMT?s=dhtmt','/car?s=dhqiche','/thinktank?s=dhzhiku','/GuanWangKanPian?s=dhshipin'];
    
    function isInArray3(arr,value){
        if(arr.indexOf&&typeof(arr.indexOf)=='function'){
            var index = arr.indexOf(value);
            if(index >= 0){
                return true;
            }
        }
        return false;
    }
    
    function getNewHost(host)
    {
    	if(host in hostList)
    	{
    		return hostList[host];
    	}else{
    		return "www.guancha.cn";
    	}
    }
    function getRemote(newhost,url,res,file)
    {
    	
    	var body="";
    	var options = {    
    			hostname: newhost,    
    			port: 80,    
    			path:url,    
    			method: 'GET'    
    		};  
    	var req = http.request(options, function (remoteRes) {   
    			remoteRes.setEncoding('utf8');    
    			remoteRes.on('data', function (chunk) { 
    				 body+=chunk;
    			}); 
    			remoteRes.on("end",function(){  
    					  handleStr(newhost,url,body,remoteRes.headers['content-type'].indexOf("text/html") != -1,function(rs){
    						  if(file!=''){
    							  fs.writeFile(file, rs, {flag: 'a'}, function (err) {
    									   if(err) {
    											console.error(err);
    										}
    									});
    						  }
    						  res.end(rs);
    					  }); 
    				  });			
    		});    
    		req.on('error', function (e) {   
    			res.end( e.message);		
    			console.log('problem with request: ' + e.message);    
    		});  
    		req.end();	
    }
    var mkdirs = module.exports.mkdirs = function(dirpath, mode, callback) {
        fs.exists(dirpath, function(exists) {
            if(exists) {
                    callback(dirpath);
            } else {
                    //尝试创建父目录,然后再创建当前目录
                    mkdirs(path.dirname(dirpath), mode, function(){
                            fs.mkdir(dirpath, mode, callback);
                    });
            }
        });
    };
    
    
    function getCatch(newhost,url,callback)
    {
    	var file=__dirname+'/tmp/'+newhost.replace(':','');
    	mkdirs(file,777,function(){
    		file+='/'+cryptos.md5(url);
    		fs.exists(file,function(exists){
    			 if(exists)
    			 {
    				 fs.readFile(file,'utf-8',function(err,data){
    					 if(err){
    							 callback(false,file);
    						 }
    						 else{
    							 callback(true,file,data); 
    						}
    				  });  
    			 }else{
    				callback(false,file); 
    			 }
    		});
    	});  
    } 
    var useCatch=true;
    //html解析器
    var cheerio = require('cheerio');
    //文件操作模块
    var fs = require('fs'),path = require('path'); 
    //加密模块
    var cryptos=require("./cryptos"); 
    //引入http模块
    var http = require("http"); 
    //设置主机名
     var hostName = '127.0.0.1';
    //设置端口
    var port = 9000;
    //创建服务
    var server = http.createServer(function(req,res){ 
    		var newhost= getNewHost(req.headers.host);
    		var url=req.url;
    		if(useCatch&&!isInArray3(noCatchList,url))
    		{
    			getCatch(newhost,url,function(hasData,file,data){
    				if(hasData)
    				{
    					res.end(data);
    				}else{
    					getRemote(newhost,url,res,file);  
    				}
    			}); 
    		}else{
    			getRemote(newhost,url,res,''); 
    		}
    		
    });
    server.listen(port,hostName,function(){
        console.log('run');
    });
    

      

  • 相关阅读:
    推荐几款很棒的 JavaScript 表单美化和验证插件
    开源来自百度商业前端数据可视化团队的超漂亮动态图表--ECharts
    两种高性能 I/O 设计模式 Reactor 和 Proactor
    基本排序算法:Python实现
    局域网聊天软件(winsocket)
    MFC控件(8):command button与syslink control
    Linux python2.4升级到2.7
    调色板QPalette类用法详解(附实例、源码)(很清楚:窗口背景色 前景色 按钮的颜色 按钮文本的颜色 )
    在IT公司,project manager 基本上和秘书,助理什么的差不多
    Qt之OpenSSL(有pro文件的路径格式,以及对libeay32和ssleay32的引用)
  • 原文地址:https://www.cnblogs.com/meieiem/p/9244751.html
Copyright © 2011-2022 走看看