我看了一下,网页为了防抓取,都是用js来生成内容,不过还是让我找到具体位置,所有物品有3万多,需要的还要过滤,有用的只有3千多,人工来做几乎不太现实的,于是写了一个winform来抓它的信息来写到数据库中
动作在DocumentCompleted事件中完成.
private const string SQL_DATA = "select * from Table1";
private const string SQL_INSERT_1 = "insert into Table1 (WebID,Name,NeedLevel,Content) values (";
private const string Sql_INSERT_2 = ")";
private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)

{
string mydocument = webBrowser1.DocumentText;

//listBox1.Items.Add(mydocument);
string SQL_INSERT = string.Empty;
//取得页面中的物品等级
int mylevel = GetLevel(mydocument);
//取得页面中物品名称
string myname = GetName(mydocument);
if (!string.IsNullOrEmpty(myname))

{
SQL_INSERT = SQL_INSERT_1 + ID.ToString() + ",'" + myname + "'," + mylevel + ",'" + mydocument + "'" + Sql_INSERT_2;

SqlConnection cn = new SqlConnection();
cn.ConnectionString = SQL_CONNECTION;
SqlCommand sqlcmd = new SqlCommand(SQL_INSERT, cn);
cn.Open();
try

{
int tmp = sqlcmd.ExecuteNonQuery();
}
catch

{
throw new Exception("no good");
}
sqlcmd.Dispose();
cn.Close();
}
}
private const string SQL_INSERT_1 = "insert into Table1 (WebID,Name,NeedLevel,Content) values (";
private const string Sql_INSERT_2 = ")";

private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)







//取得页面中的物品等级

//取得页面中物品名称



























取得物品名称和等级:















































无法判断浏览器是否完成加载,所以用timer控件来完成:





















timer的interval控制在1000ms,40000条数据用了11个多小时,从昨天晚上10点到今天早上,刚来的时候看到数据全都乖乖的在数据库呆着了.呵呵,搞定,交差...