google的PageRank,虽然各大网站都能查询,但是,具体是从google 哪个数据源来的?幸运的是,google tool bar
上有pagerank的值。Sniffer 会告诉你,大概是从:
GET /search?client=navclient-auto&iqrn=-WM&orig=0BATi&ie=UTF-8&oe=UTF-8&features=Rank:WH:&q=info:http%3a%2f%2fwww.sina.com.cn%2f&googleip=O;74.125.153.147;297&ch=772063278971 HTTP/1.1
User-Agent: Mozilla/4.0 (compatible; GoogleToolbar 6.2.1910.1554; Windows XP 5.1; MSIE 8.0.6001.18702)
Host: toolbarqueries.google.com
Connection: Keep-Alive
Cache-Control: no-cache
toolbarqueries.google.com 这个主机进行查询的。
看上去好像换一下 q 变量就可以请求了。但是,发现不可以。我翻遍了很多论坛,几乎都是转载,而且都不能运行代码。
后来,在一个国外论坛里面找到了一段代码。但是也用不了。找了多段代码,都用不了,但是我发现核心算法都差不多。
经过我修修改改。下面这段终于可以了:
调用就一句话。解密的算法是通过反汇编得到的。不知道,博客园有没有知道google 执行了什么加密算法计算
ch。我看了以后,是不知所云。
上有pagerank的值。Sniffer 会告诉你,大概是从:
GET /search?client=navclient-auto&iqrn=-WM&orig=0BATi&ie=UTF-8&oe=UTF-8&features=Rank:WH:&q=info:http%3a%2f%2fwww.sina.com.cn%2f&googleip=O;74.125.153.147;297&ch=772063278971 HTTP/1.1
User-Agent: Mozilla/4.0 (compatible; GoogleToolbar 6.2.1910.1554; Windows XP 5.1; MSIE 8.0.6001.18702)
Host: toolbarqueries.google.com
Connection: Keep-Alive
Cache-Control: no-cache
toolbarqueries.google.com 这个主机进行查询的。
看上去好像换一下 q 变量就可以请求了。但是,发现不可以。我翻遍了很多论坛,几乎都是转载,而且都不能运行代码。
后来,在一个国外论坛里面找到了一段代码。但是也用不了。找了多段代码,都用不了,但是我发现核心算法都差不多。
经过我修修改改。下面这段终于可以了:
<?php
/*
Written and contributed by
Alex Stapleton,
Andy Doctorow,
Tarakan,
Bill Zeller,
Vijay "Cyberax" Bhatter
traB
This code is released into the public domain
*/
define('GOOGLE_MAGIC', 0xE6359A60);
//unsigned shift right
function zeroFill($a, $b)
{
$z = hexdec(80000000);
if ($z & $a)
{
$a = ($a>>1);
$a &= (~$z);
$a |= 0x40000000;
$a = ($a>>($b-1));
}
else
{
$a = ($a>>$b);
}
return $a;
}
function mix($a,$b,$c) {
$a -= $b; $a -= $c; $a ^= (zeroFill($c,13));
$b -= $c; $b -= $a; $b ^= ($a<<8);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,13));
$a -= $b; $a -= $c; $a ^= (zeroFill($c,12));
$b -= $c; $b -= $a; $b ^= ($a<<16);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,5));
$a -= $b; $a -= $c; $a ^= (zeroFill($c,3));
$b -= $c; $b -= $a; $b ^= ($a<<10);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,15));
return array($a,$b,$c);
}
function GoogleCH($url, $length=null, $init=GOOGLE_MAGIC) {
if(is_null($length)) {
$length = sizeof($url);
}
$a = $b = 0x9E3779B9;
$c = $init;
$k = 0;
$len = $length;
while($len >= 12) {
$a += ($url[$k+0] +($url[$k+1]<<8) +($url[$k+2]<<16) +($url[$k+3]<<24));
$b += ($url[$k+4] +($url[$k+5]<<8) +($url[$k+6]<<16) +($url[$k+7]<<24));
$c += ($url[$k+8] +($url[$k+9]<<8) +($url[$k+10]<<16)+($url[$k+11]<<24));
$mix = mix($a,$b,$c);
$a = $mix[0]; $b = $mix[1]; $c = $mix[2];
$k += 12;
$len -= 12;
}
$c += $length;
switch($len) /* all the case statements fall through */
{
case 11: $c+=($url[$k+10]<<24);
case 10: $c+=($url[$k+9]<<16);
case 9 : $c+=($url[$k+8]<<8);
/* the first byte of c is reserved for the length */
case 8 : $b+=($url[$k+7]<<24);
case 7 : $b+=($url[$k+6]<<16);
case 6 : $b+=($url[$k+5]<<8);
case 5 : $b+=($url[$k+4]);
case 4 : $a+=($url[$k+3]<<24);
case 3 : $a+=($url[$k+2]<<16);
case 2 : $a+=($url[$k+1]<<8);
case 1 : $a+=($url[$k+0]);
/* case 0: nothing left to add */
}
$mix = mix($a,$b,$c);
/*-------------------------------------------- report the result */
return $mix[2];
}
//converts a string into an array of integers containing the numeric value of the char
function strord($string) {
for($i=0;$i<strlen($string);$i++) {
$result[$i] = ord($string{$i});
}
return $result;
}
// converts an array of 32 bit integers into an array with 8 bit values. Equivalent to (BYTE *)arr32
function c32to8bit($arr32) {
for($i=0;$i<count($arr32);$i++) {
for ($bitOrder=$i*4;$bitOrder<=$i*4+3;$bitOrder++) {
$arr8[$bitOrder]=$arr32[$i]&255;
$arr32[$i]=zeroFill($arr32[$i], 8);
}
}
return $arr8;
}
// http://www.example.com/ - Checksum: 6540747202
function getCheckSum($url, $isnewversion = false)
{
$url = 'info:' . $url;
$ch = GoogleCH(strord($url));
//Checksum < 2.0.114
if ($isnewversion) {
$ch = ((($ch/7) << 2) | (((int)fmod($ch,13))&7));
$prbuf = array();
$prbuf[0] = $ch;
for($i = 1; $i < 20; $i++) {
$prbuf[$i] = $prbuf[$i-1]-9;
}
$ch = GoogleCH(c32to8bit($prbuf), 80);
}
$ch = sprintf("6%u", $ch);
return $ch;
}
function getPageRank($url, $isnewversion = false)
{
$ch = getCheckSum($url, $isnewversion);
$url = 'info:' . urlencode($url);
$q = "http://toolbarqueries.google.com/search?client=navclient-auto&features=Rank:WH:&q=$url&ch=$ch";
return file_get_contents($q);
}
echo getPageRank($_GET['q']);
?>
/*
Written and contributed by
Alex Stapleton,
Andy Doctorow,
Tarakan,
Bill Zeller,
Vijay "Cyberax" Bhatter
traB
This code is released into the public domain
*/
define('GOOGLE_MAGIC', 0xE6359A60);
//unsigned shift right
function zeroFill($a, $b)
{
$z = hexdec(80000000);
if ($z & $a)
{
$a = ($a>>1);
$a &= (~$z);
$a |= 0x40000000;
$a = ($a>>($b-1));
}
else
{
$a = ($a>>$b);
}
return $a;
}
function mix($a,$b,$c) {
$a -= $b; $a -= $c; $a ^= (zeroFill($c,13));
$b -= $c; $b -= $a; $b ^= ($a<<8);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,13));
$a -= $b; $a -= $c; $a ^= (zeroFill($c,12));
$b -= $c; $b -= $a; $b ^= ($a<<16);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,5));
$a -= $b; $a -= $c; $a ^= (zeroFill($c,3));
$b -= $c; $b -= $a; $b ^= ($a<<10);
$c -= $a; $c -= $b; $c ^= (zeroFill($b,15));
return array($a,$b,$c);
}
function GoogleCH($url, $length=null, $init=GOOGLE_MAGIC) {
if(is_null($length)) {
$length = sizeof($url);
}
$a = $b = 0x9E3779B9;
$c = $init;
$k = 0;
$len = $length;
while($len >= 12) {
$a += ($url[$k+0] +($url[$k+1]<<8) +($url[$k+2]<<16) +($url[$k+3]<<24));
$b += ($url[$k+4] +($url[$k+5]<<8) +($url[$k+6]<<16) +($url[$k+7]<<24));
$c += ($url[$k+8] +($url[$k+9]<<8) +($url[$k+10]<<16)+($url[$k+11]<<24));
$mix = mix($a,$b,$c);
$a = $mix[0]; $b = $mix[1]; $c = $mix[2];
$k += 12;
$len -= 12;
}
$c += $length;
switch($len) /* all the case statements fall through */
{
case 11: $c+=($url[$k+10]<<24);
case 10: $c+=($url[$k+9]<<16);
case 9 : $c+=($url[$k+8]<<8);
/* the first byte of c is reserved for the length */
case 8 : $b+=($url[$k+7]<<24);
case 7 : $b+=($url[$k+6]<<16);
case 6 : $b+=($url[$k+5]<<8);
case 5 : $b+=($url[$k+4]);
case 4 : $a+=($url[$k+3]<<24);
case 3 : $a+=($url[$k+2]<<16);
case 2 : $a+=($url[$k+1]<<8);
case 1 : $a+=($url[$k+0]);
/* case 0: nothing left to add */
}
$mix = mix($a,$b,$c);
/*-------------------------------------------- report the result */
return $mix[2];
}
//converts a string into an array of integers containing the numeric value of the char
function strord($string) {
for($i=0;$i<strlen($string);$i++) {
$result[$i] = ord($string{$i});
}
return $result;
}
// converts an array of 32 bit integers into an array with 8 bit values. Equivalent to (BYTE *)arr32
function c32to8bit($arr32) {
for($i=0;$i<count($arr32);$i++) {
for ($bitOrder=$i*4;$bitOrder<=$i*4+3;$bitOrder++) {
$arr8[$bitOrder]=$arr32[$i]&255;
$arr32[$i]=zeroFill($arr32[$i], 8);
}
}
return $arr8;
}
// http://www.example.com/ - Checksum: 6540747202
function getCheckSum($url, $isnewversion = false)
{
$url = 'info:' . $url;
$ch = GoogleCH(strord($url));
//Checksum < 2.0.114
if ($isnewversion) {
$ch = ((($ch/7) << 2) | (((int)fmod($ch,13))&7));
$prbuf = array();
$prbuf[0] = $ch;
for($i = 1; $i < 20; $i++) {
$prbuf[$i] = $prbuf[$i-1]-9;
}
$ch = GoogleCH(c32to8bit($prbuf), 80);
}
$ch = sprintf("6%u", $ch);
return $ch;
}
function getPageRank($url, $isnewversion = false)
{
$ch = getCheckSum($url, $isnewversion);
$url = 'info:' . urlencode($url);
$q = "http://toolbarqueries.google.com/search?client=navclient-auto&features=Rank:WH:&q=$url&ch=$ch";
return file_get_contents($q);
}
echo getPageRank($_GET['q']);
?>
调用就一句话。解密的算法是通过反汇编得到的。不知道,博客园有没有知道google 执行了什么加密算法计算
ch。我看了以后,是不知所云。