网上好多asp版本的百度抓取程序,现贴上asp.net 2.0,C#的百度收录数抓取方法。希望各位指正。
1using System;
2using System.Collections.Generic;
3using System.Text;
4using System.Net;
5using System.Text.RegularExpressions;
6namespace Tool
7{
8 public class SeoCount
9 {
10 /// <summary>返回百度:某期间,收录数量
11 /// 参数strTimespan代表意义
12 /// "1"请选择要查询近几天的收录量
13 /// "1"查询昨日收录情况
14 /// "7"查询最近1星期收录情况
15 ///"30"查询最近1月收录情况
16 ///"360"查询最近1年收录情况
17 ///"0"查询总的(所有日期)收录情况
18 /// </summary>
19 /// <param name="strHostName"></param>
20 /// <returns></returns>
21 public string BaiduCount(string strTimespan,string strHostName)
22 {
23 string uri = "http://www.baidu.com/s?lm="+strTimespan+"&wd=site:"+strHostName;
24 string html = new WebClient().DownloadString(uri);
25 string pattern = @"百度一下,找到相关网页[\s\S]*?(?=篇,用时)";
26 //string number = new Regex(@"找到相关网页约([\d,]+)篇").Match.Groups[1].Value;
27 string number1 = Regex.Match(html, pattern, RegexOptions.IgnoreCase).Value;
28 string lastnumber = number1.Replace("百度一下,找到相关网页", "");
29 //如果有很多篇去掉约,返回纯数字串
30 if (lastnumber.Contains("约"))
31 {
32 lastnumber.Replace("约","");
33 }
34 //如果一篇都没有返回0
35 if (lastnumber == "")
36 {
37 lastnumber = "0";
38 }
39 return lastnumber;
40
41 }
42 }
43}
2using System.Collections.Generic;
3using System.Text;
4using System.Net;
5using System.Text.RegularExpressions;
6namespace Tool
7{
8 public class SeoCount
9 {
10 /// <summary>返回百度:某期间,收录数量
11 /// 参数strTimespan代表意义
12 /// "1"请选择要查询近几天的收录量
13 /// "1"查询昨日收录情况
14 /// "7"查询最近1星期收录情况
15 ///"30"查询最近1月收录情况
16 ///"360"查询最近1年收录情况
17 ///"0"查询总的(所有日期)收录情况
18 /// </summary>
19 /// <param name="strHostName"></param>
20 /// <returns></returns>
21 public string BaiduCount(string strTimespan,string strHostName)
22 {
23 string uri = "http://www.baidu.com/s?lm="+strTimespan+"&wd=site:"+strHostName;
24 string html = new WebClient().DownloadString(uri);
25 string pattern = @"百度一下,找到相关网页[\s\S]*?(?=篇,用时)";
26 //string number = new Regex(@"找到相关网页约([\d,]+)篇").Match.Groups[1].Value;
27 string number1 = Regex.Match(html, pattern, RegexOptions.IgnoreCase).Value;
28 string lastnumber = number1.Replace("百度一下,找到相关网页", "");
29 //如果有很多篇去掉约,返回纯数字串
30 if (lastnumber.Contains("约"))
31 {
32 lastnumber.Replace("约","");
33 }
34 //如果一篇都没有返回0
35 if (lastnumber == "")
36 {
37 lastnumber = "0";
38 }
39 return lastnumber;
40
41 }
42 }
43}