zoukankan      html  css  js  c++  java
  • C# HttpWebRequest 从google服务器获取google的PageRank PR值

    首先,本文的实现参考了这篇:

    http://www.codeproject.com/KB/aspnet/Google_Pagerank.aspx

    简述一下原理:

    获取PR值,通过向google服务器发送一个http请求来实现。

    http://toolbarqueries.google.com.hk/search?client=navclient-auto&hl=en&ch=6771535612&ie=UTF-8&oe=UTF-8&features=Rank&q=info:http%3A%2F%2Fwww.codeproject.com%2F

    在浏览器中输入上面的链接会google服务器会返回一个字符串

    上面的链接返回的是:Rank_1:1:6

    要实现对任意链接的PR值的查询,关键要正确构造请求的URL,ch=6771535612这一项很重要,它是请求网址的hash值,这个hash值得算法在上面的那篇博文中就有详细的说明。

    这里将主要的代码转贴在下面:

            private const UInt32 GOOGLE_MAGIC = 0xE6359A60;

    private static void _mix(ref UInt32 a, ref UInt32 b, ref UInt32 c)
    {
    a
    -= b; a -= c; a ^= c >> 13;
    b
    -= c; b -= a; b ^= a << 8;
    c
    -= a; c -= b; c ^= b >> 13;
    a
    -= b; a -= c; a ^= c >> 12;
    b
    -= c; b -= a; b ^= a << 16;
    c
    -= a; c -= b; c ^= b >> 5;
    a
    -= b; a -= c; a ^= c >> 3;
    b
    -= c; b -= a; b ^= a << 10;
    c
    -= a; c -= b; c ^= b >> 15;
    }

    public static string GoogleCH(string url)
    {
    url
    = string.Format("info:{0}", url);

    int length = url.Length;

    UInt32 a, b;
    UInt32 c
    = GOOGLE_MAGIC;

    int k = 0;
    int len = length;

    a
    = b = 0x9E3779B9;

    while (len >= 12)
    {
    a
    += (UInt32)(url[k + 0] + (url[k + 1] << 8) + (url[k + 2] << 16) + (url[k + 3] << 24));
    b
    += (UInt32)(url[k + 4] + (url[k + 5] << 8) + (url[k + 6] << 16) + (url[k + 7] << 24));
    c
    += (UInt32)(url[k + 8] + (url[k + 9] << 8) + (url[k + 10] << 16) + (url[k + 11] << 24));
    _mix(
    ref a, ref b, ref c);
    k
    += 12;
    len
    -= 12;
    }
    c
    += (UInt32)length;
    switch (len) /* all the case statements fall through */
    {
    case 11:
    c
    += (UInt32)(url[k + 10] << 24);
    goto case 10;
    case 10:
    c
    += (UInt32)(url[k + 9] << 16);
    goto case 9;
    case 9:
    c
    += (UInt32)(url[k + 8] << 8);
    goto case 8;
    /* the first byte of c is reserved for the length */
    case 8:
    b
    += (UInt32)(url[k + 7] << 24);
    goto case 7;
    case 7:
    b
    += (UInt32)(url[k + 6] << 16);
    goto case 6;
    case 6:
    b
    += (UInt32)(url[k + 5] << 8);
    goto case 5;
    case 5:
    b
    += (UInt32)(url[k + 4]);
    goto case 4;
    case 4:
    a
    += (UInt32)(url[k + 3] << 24);
    goto case 3;
    case 3:
    a
    += (UInt32)(url[k + 2] << 16);
    goto case 2;
    case 2:
    a
    += (UInt32)(url[k + 1] << 8);
    goto case 1;
    case 1:
    a
    += (UInt32)(url[k + 0]);
    break;
    default:
    break;
    /* case 0: nothing left to add */
    }

    _mix(
    ref a, ref b, ref c);

    return string.Format("6{0}", c);
    }

      

    再给出一个调用的用例供参考:

                try
    {
    //构造请求的URL
    string checksum = GoogleCH(txtUrl.Text);
    string query = string.Format(@"http://toolbarqueries.google.com/search?client=navclient-auto&ch={0}&features=Rank&q=info:{1}", checksum, txtUrl.Text);

    //请求并获得响应
    request = (HttpWebRequest)HttpWebRequest.Create(query);
    response
    = (HttpWebResponse)request.GetResponse();

    if(response==null)
    {
    txtResponse.Text
    = "response==NULL";
    return;
    }

    Stream stream
    = response.GetResponseStream();
    txtResponse.Text
    = "";

    byte []buf=new byte[1024];

    int readlen;

    while ((readlen = stream.Read(buf, 0, 1024)) > 0)
    {
    //注意这里解析显示PR值
    txtPR.Text = int.Parse(Regex.Match(Encoding.UTF8.GetString(buf, 0, readlen), "Rank_1:[0-9]:([0-9]+)").Groups[1].Value).ToString();
    }
    }
    catch (System.UriFormatException)
    {
    txtResponse.Text
    = "无效的URL";
    }

      

  • 相关阅读:
    第 6 章 Android SDK 版本与兼容
    第 5 章 第二个 activity
    第 4 章 Android 应用的调试
    第 3 章 Activity 的生命周期
    第 2 章 Android 与 MVC 设计模式
    第 1 章 Android 应用初体验
    ACM基础之线性结构:一刷 参考答案
    小马慢慢跑
    Ubuntu 利用 xinetd 限制 SSH 连接数
    C# 定制 Attribute 简单使用
  • 原文地址:https://www.cnblogs.com/oyjj/p/2132861.html
Copyright © 2011-2022 走看看