zoukankan      html  css  js  c++  java
  • 网页内容抓取工具、利用多线程

    http://www.cnblogs.com/hanguoji/archive/2007/02/27/657902.html

    一共涉及三个类分别为:
    数据访问类DBObject.cs、对应功能针对数据库操作类IRMNewsInteDB.cs、数据抓取类SpiderDispose.cs

    数据访问类:DBObject.cs

    using System;
    using System.Data;
    using System.Data.SqlClient;

    namespace IRMSpiderTool.DBA
    {
        
    /// <summary>
        
    /// 所有数据库连接类的基类
        
    /// </summary>

        public abstract class DBObject
        
    {
            
    /// <summary>
            
    /// DBObject的连接对象
            
    /// </summary>

            protected SqlConnection Connection;

            
    /// <summary>
            
    /// 私有变量,当前对象的连接字符串
            
    /// </summary>

            private string _connectionString = string.Empty;
            
    /// <summary>
            
    /// 私有变量,执行数据库操作所产生的错误信息
            
    /// </summary>

            private string _strError = string.Empty;

            
    /// <summary>
            
    /// 通过连接字符串创建数据库连接对象
            
    /// </summary>
            
    /// <param name="newConnectionString">连接字符串</param>

            protected void CreateConnection( string newConnectionString )
            
    {
                _connectionString 
    = newConnectionString;
                Connection 
    = new SqlConnection(_connectionString);
            }


            
    /// <summary>
            
    /// 只读属性,返回当前对象的连接字符串
            
    /// </summary>

            protected string ConnectionString
            
    {
                
    get
                
    {
                    
    return _connectionString;
                }

            }


            
    /// <summary>
            
    /// 只读属性,返回错误与异常信息
            
    /// </summary>

            protected string StrError
            
    {
                
    get
                
    {
                    
    return _strError;
                }

            }

            
            
    /// <summary>
            
    /// 将错误信息赋值给变量用于外部调用
            
    /// </summary>
            
    /// <param name="strFunctionName">出错所调用的函数名称</param>
            
    /// <param name="strProcName">出错所执行的存储过程或是SQL</param>
            
    /// <param name="strError">错误异常信息</param>

            private void SetErrorMsg(string strFunctionName,string strProcName,string strErrMsg)
            
    {
                _strError 
    = "错误归属函数:" + strFunctionName + "\n错误标识:" + strProcName + "\n异常信息:" + strErrMsg;
            }


    //        /// <summary>
    //        /// 创建一个返回参数用于判断执行存储过程是否成功
    //        /// </summary>
    //        /// <param name="StoredProcName">存储过程名</param>
    //        /// <param name="sqlParameters">参数数组</param>
    //        /// <returns>新的Command对象</returns>
    //        private SqlCommand BuildIntCommand( string storedProcName,IDataParameter[] sqlParameters )
    //        {
    //            SqlCommand sqlCommand = BuildQueryCommand( storedProcName, sqlParameters );
    //
    //            sqlCommand.Parameters.Add( new SqlParameter( "@iSuccess",
    //                SqlDbType.Int,    4,    //    
    //                ParameterDirection.Output,    false,    //    可以为空
    //                0,    //    精度
    //                0,    //    范围
    //                string.Empty,    DataRowVersion.Default,    null));
    //        
    //            return sqlCommand;
    //        }

            
    /// <summary>
            
    /// 创建用于存储过程执行的 Command
            
    /// </summary>
            
    /// <param name="storedProcName">存储过程名</param>
            
    /// <param name="sqlParameters">参数清单</param>
            
    /// <returns>新的Command对象</returns>

            private SqlCommand BuildQueryCommand( string storedProcName, IDataParameter[] sqlParameters )
            
    {
                SqlCommand sqlCommand 
    = new SqlCommand( storedProcName, Connection );
                sqlCommand.CommandType 
    = CommandType.StoredProcedure;

                
    foreach (SqlParameter parameter in sqlParameters)
                
    {
                    sqlCommand.Parameters.Add( parameter );
                }


                
    return sqlCommand;
            }

            
    private SqlCommand BuildQueryCommand( string storedProcName)
            
    {
                SqlCommand sqlCommand 
    = new SqlCommand( storedProcName, Connection );
                sqlCommand.CommandType 
    = CommandType.StoredProcedure;
                
    return sqlCommand;
            }

            
    /// <summary>
            
    /// 创建用于SQL 语句执行的 Command
            
    /// </summary>
            
    /// <param name="storedProcName">要执行的SQL语句</param>
            
    /// <param name="sqlParameters">参数清单</param>
            
    /// <returns>新的Command对象</returns>

            private SqlCommand BuildSqlCommand( string strSql, IDataParameter[] sqlParameters )
            
    {
                SqlCommand sqlCommand 
    = new SqlCommand( strSql, Connection );

                
    foreach (SqlParameter parameter in sqlParameters)
                
    {
                    sqlCommand.Parameters.Add( parameter );
                }


                
    return sqlCommand;
            }


            
    /// <summary>
            
    /// 创建无参数用于SQL语句执行的 Command 对象
            
    /// </summary>
            
    /// <param name="storedProcName">要执行的SQL语句</param>
            
    /// <returns>新的Command对象</returns>

            private SqlCommand BuildSqlCommand( string strSql )
            
    {
                SqlCommand sqlCommand 
    = new SqlCommand( strSql, Connection );
                
    return sqlCommand;
            }



            
    /// <summary>
            
    /// 执行存储过程,返回该存储过程影响的行数
            
    /// </summary>
            
    /// <param name="storedProcName">存储过程名</param>
            
    /// <param name="sqlParameters">参数清单,注意:如果需要输出参数,应把输出参数索引定为最后一项</param>
            
    /// <returns>执行是否成功,大于0成功,小于0失败</returns>

            protected int RunProcInt( string storedProcName, IDataParameter[] sqlParameters)
            
    {
                
    int iSuccess = 0;
                
    try
                
    {
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlCommand sqlCommand 
    = BuildQueryCommand( storedProcName, sqlParameters );
                    sqlCommand.ExecuteNonQuery();
                    iSuccess 
    = (int)sqlCommand.Parameters[sqlParameters.Length - 1].Value;
                    sqlCommand.Dispose();
                    
    return iSuccess;
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunProcInt",storedProcName,ex.Message);    //处理错误
                    return -2;
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }



            
    /// <summary>
            
    /// 执行存储过程,为该存储过程所有输出参数赋值供使用
            
    /// </summary>
            
    /// <param name="storedProcName">存储过程名</param>
            
    /// <param name="sqlParameters">参数清单,注意:如果需要输出参数,应把输出参数索引定为最后一项</param>
            
    /// <returns>返回执行后需要输出的参数的值</returns>

            protected void RunProcOutPara( string storedProcName, IDataParameter[] sqlParameters)
            
    {
                
    try
                
    {
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlCommand sqlCommand 
    = BuildQueryCommand( storedProcName, sqlParameters );
                    sqlCommand.ExecuteNonQuery();
                    sqlCommand.Dispose();
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunProcInt",storedProcName,ex.Message);    //处理错误
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }


            
    /// <summary>
            
    /// 执行存储过程并返回一个DataReader
            
    /// </summary>
            
    /// <param name="storedProcName">存储过程名</param>
            
    /// <param name="sqlParameters">参数清单</param>
            
    /// <returns>返回数据读取器</returns>

            protected SqlDataReader RunProcDataReader( string storedProcName, IDataParameter[] sqlParameters )
            
    {
                SqlDataReader returnReader;
                
    try
                
    {
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlCommand sqlCommand 
    = BuildQueryCommand( storedProcName, sqlParameters );
                    returnReader 
    = sqlCommand.ExecuteReader();
                    returnReader.Close();
                    sqlCommand.Dispose();
                    
    return returnReader;
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunProcDataReader",storedProcName,ex.Message);    //处理错误
                    return null;
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }


            
    /// <summary>
            
    /// 执行存储过程,并返回新的DS对象
            
    /// </summary>
            
    /// <param name="storedProcName">存储过程名</param>
            
    /// <param name="sqlParameters">参数清单</param>
            
    /// <param name="tableName">返回DS中的表名</param>
            
    /// <returns>返回一个新的数据集</returns>

            protected DataSet RunProcDataSet( string storedProcName, IDataParameter[] sqlParameters, string tableName )
            
    {
                
    try
                
    {
                    DataSet dataSet 
    = new DataSet();
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlDataAdapter sqlDA 
    = new SqlDataAdapter();
                    sqlDA.SelectCommand 
    = BuildQueryCommand( storedProcName, sqlParameters );
                    sqlDA.Fill( dataSet, tableName );
                    sqlDA.Dispose();
                    
    return dataSet;
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunProcDataSet",storedProcName,ex.Message);    //处理错误
                    return null;
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }


            
    /// <summary>
            
    /// 执行存储过程,并返回新的DT对象
            
    /// </summary>
            
    /// <param name="storedProcName">存储过程名</param>
            
    /// <param name="sqlParameters">参数清单</param>

            protected DataTable RunProcDataTable( string storedProcName, IDataParameter[] sqlParameters)
            
    {
                DataTable dataTable 
    = new DataTable();
                
    try
                
    {
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlDataAdapter sqlDA 
    = new SqlDataAdapter();
                    sqlDA.SelectCommand 
    = BuildQueryCommand( storedProcName, sqlParameters );
                    sqlDA.Fill( dataTable );
                    sqlDA.Dispose();
                    
    return dataTable;
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunProcDataTable",storedProcName,ex.Message);    //处理错误
                    return null;
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }


            
    /// <summary>
            
    /// 执行存储过程,并返回新的DT对象
            
    /// </summary>
            
    /// <param name="storedProcName">存储过程名</param>
            
    /// <param name="sqlParameters">参数清单</param>

            protected DataTable RunProcDataTable( string storedProcName)
            
    {
                DataTable dataTable 
    = new DataTable();
                
    try
                
    {
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlDataAdapter sqlDA 
    = new SqlDataAdapter();
                    sqlDA.SelectCommand 
    = BuildQueryCommand( storedProcName);
                    sqlDA.Fill( dataTable );
                    sqlDA.Dispose();
                    
    return dataTable;
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunProcDataTable",storedProcName,ex.Message);    //处理错误
                    return null;
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }


            
    /// <summary>
            
    /// 执行存储过程,并返回新的DataRow对象
            
    /// </summary>
            
    /// <param name="storedProcName">存储过程名</param>
            
    /// <param name="sqlParameters">参数清单</param>

            protected DataRow RunProcDataRow( string storedProcName, IDataParameter[] sqlParameters)
            
    {
                DataTable dataTable 
    = new DataTable();
                
    try
                
    {
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlDataAdapter sqlDA 
    = new SqlDataAdapter();
                    sqlDA.SelectCommand 
    = BuildQueryCommand( storedProcName, sqlParameters );
                    sqlDA.Fill( dataTable );
                    sqlDA.Dispose();
                    
    return dataTable.Rows[0];
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunProcDataRow",storedProcName,ex.Message);    //处理错误
                    return null;
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }


            
    /// <summary>
            
    /// 执行简单语句的Sql操作(包括:查询,修改)
            
    /// </summary>
            
    /// <param name="strsql">SQL语句</param>
            
    /// <param name="ary_par">SQL语句中的参数集合</param>
            
    /// <param name="b_change">参数b_change为判断是更改数据表还是查询数据表,如果b_change为true则处理改变数据库的操作,否则则处理简单查询数据库的操作</param>
            
    /// <returns></returns>

            protected int RunSqlInt(string strSql,IDataParameter[] sqlParameters)
            
    {
                
    int iSuccess = 0;
                
    try
                
    {
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlCommand sqlCommand 
    = BuildSqlCommand( strSql, sqlParameters );
                    iSuccess 
    = sqlCommand.ExecuteNonQuery();
                    sqlCommand.Dispose();
                    
    return iSuccess;
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunSqlInt",strSql,ex.Message);    //处理错误
                    return -2;
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }

            
    /// <summary>
            
    /// 执行SQL语句返回记录DataReader
            
    /// </summary>
            
    /// <param name="strsql">SQL语句<</param>
            
    /// <param name="sqlParameters">参数清单</param>
            
    /// <returns>返回一个新的数据集</returns>

            protected SqlDataReader RunSqlDataReader(string strSql,IDataParameter[] sqlParameters)
            
    {
                SqlDataReader returnReader;
                
    try
                
    {
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlCommand sqlCommand 
    = BuildSqlCommand( strSql, sqlParameters );
                    returnReader 
    = sqlCommand.ExecuteReader();
                    returnReader.Close();
                    sqlCommand.Dispose();
                    
    return returnReader;
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunSqlDataReader",strSql,ex.Message);    //处理错误
                    return null;
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }


            
    /// <summary>
            
    /// 执行SQL语句,并返回新的DS对象
            
    /// </summary>
            
    /// <param name="strSql">SQL语句</param>
            
    /// <param name="sqlParameters">参数清单</param>
            
    /// <param name="tableName">返回DS中的表名</param>
            
    /// <returns>返回一个新的数据集</returns>

            protected DataSet RunSqlDataSet( string strSql, IDataParameter[] sqlParameters, string tableName )
            
    {
                DataSet dataSet 
    = new DataSet();
                
    try
                
    {
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlDataAdapter sqlDA 
    = new SqlDataAdapter();
                    sqlDA.SelectCommand 
    = BuildSqlCommand( strSql, sqlParameters );
                    sqlDA.Fill( dataSet, tableName );
                    sqlDA.Dispose();
                    
    return dataSet;
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunSqlDataSet",strSql,ex.Message);    //处理错误
                    return null;
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }


            
    /// <summary>
            
    /// 执行SQL语句,并返回新的DT对象
            
    /// </summary>
            
    /// <param name="strSql">SQL语句</param>
            
    /// <param name="sqlParameters">参数清单</param>
            
    /// <returns>返回一个新的数据集</returns>

            protected DataTable RunSqlDataTable( string strSql, IDataParameter[] sqlParameters)
            
    {
                DataTable dataTable 
    = new DataTable();
                
    try
                
    {
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlDataAdapter sqlDA 
    = new SqlDataAdapter();
                    sqlDA.SelectCommand 
    = BuildSqlCommand( strSql, sqlParameters );
                    sqlDA.Fill( dataTable );
                    sqlDA.Dispose();
                    
    return dataTable;
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunSqlDataTable",strSql,ex.Message);    //处理错误
                    return null;
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }


            
    /// <summary>
            
    /// 执行SQL语句,并返回新的DataRow对象
            
    /// </summary>
            
    /// <param name="strSql">SQL语句</param>
            
    /// <param name="sqlParameters">参数清单</param>
            
    /// <returns>返回一个新的数据集</returns>

            protected DataRow RunSqlDataRow( string strSql, IDataParameter[] sqlParameters)
            
    {
                DataTable dataTable 
    = new DataTable();
                
    try
                
    {
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlDataAdapter sqlDA 
    = new SqlDataAdapter();
                    sqlDA.SelectCommand 
    = BuildSqlCommand( strSql, sqlParameters );
                    sqlDA.Fill( dataTable );
                    sqlDA.Dispose();
                    
    return dataTable.Rows[0];
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunSqlDataRow",strSql,ex.Message);    //处理错误
                    return null;
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }



            
    /// <summary>
            
    /// 执行无参数SQL语句返回记录DataReader
            
    /// </summary>
            
    /// <param name="strsql">SQL语句<</param>
            
    /// <returns>返回一个新的数据集</returns>

            protected SqlDataReader RunSqlDataReader(string strSql)
            
    {
                SqlDataReader returnReader;
                
    try
                
    {
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlCommand sqlCommand 
    = BuildSqlCommand( strSql );
                    returnReader 
    = sqlCommand.ExecuteReader();
                    returnReader.Close();
                    sqlCommand.Dispose();
                    
    return returnReader;
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunSqlDataReader",strSql,ex.Message);    //处理错误
                    return null;
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }


            
    /// <summary>
            
    /// 执行无参数SQL语句,并返回新的DS对象
            
    /// </summary>
            
    /// <param name="strSql">SQL语句</param>
            
    /// <param name="tableName">返回DS中的表名</param>
            
    /// <returns>返回一个新的数据集</returns>

            protected DataSet RunSqlDataSet( string strSql, string tableName )
            
    {
                DataSet dataSet 
    = new DataSet();
                
    try
                
    {
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlDataAdapter sqlDA 
    = new SqlDataAdapter();
                    sqlDA.SelectCommand 
    = BuildSqlCommand( strSql );
                    sqlDA.Fill( dataSet, tableName );
                    sqlDA.Dispose();
                    
    return dataSet;
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunSqlDataSet",strSql,ex.Message);    //处理错误
                    return null;
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }


            
    /// <summary>
            
    /// 执行无参数SQL语句,并返回新的DT对象
            
    /// </summary>
            
    /// <param name="strSql">SQL语句</param>
            
    /// <returns>返回一个新的数据集</returns>

            protected DataTable RunSqlDataTable( string strSql )
            
    {
                DataTable dataTable 
    = new DataTable();
                
    try
                
    {
                    
    if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                    
    {
                        Connection.Open();
                    }

                    SqlDataAdapter sqlDA 
    = new SqlDataAdapter();
                    sqlDA.SelectCommand 
    = BuildSqlCommand( strSql );
                    sqlDA.Fill( dataTable );
                    sqlDA.Dispose();
                    
    return dataTable;
                }

                
    catch(Exception ex)
                
    {
                    SetErrorMsg(
    "RunSqlDataTable",strSql,ex.Message);    //处理错误
                    return null;
                }

                
    finally
                
    {
                    Connection.Close();
                }

            }


        }

    }


    对应功能针对数据库操作类:IRMNewsInteDB.cs
    对应功能的数据库访问类

    数据抓取类:SpiderDispose.cs

    using System;
    using System.Collections;
    using System.Threading;
    using System.Data;
    using System.Net;
    using System.IO;
    using System.Text.RegularExpressions;

    namespace IRMSpiderTool.SpiderDB
    {
        
    //声明一个回调函数
        public delegate void ExampleCallback(Hashtable htError);
        
    /// <summary>
        
    /// SpiderDispose 的摘要说明。
        
    /// </summary>

        public class SpiderDispose
        
    {
            
    private string strStockCode = string.Empty;
            
    public Hashtable htError;
            
    private ExampleCallback callback;

            
    /// <summary>
            
    /// 功能描述:构造函数,用于向线程中传递参数,或用于回调函数的参数传递
            
    /// </summary>
            
    /// <param name="strCode">上市公司股票代码号</param>
            
    /// <param name="callbackDelegate">错误信息回调函数</param>

            public SpiderDispose(string strCode,ExampleCallback callbackDelegate)
            
    {
                strStockCode 
    = strCode;
                callback 
    = callbackDelegate;
            }


            
    /// <summary>
            
    /// 功能描述:执行抓取网页内容操作并调用主窗体的抽取函数进行页面内容处理
            
    /// </summary>

            public void ConvertDataRunThread()
            
    {
                htError 
    = new Hashtable();
                
    //获取抓取首地址
                string strSpiderURL = System.Configuration.ConfigurationSettings.AppSettings["SpiderURL"].ToString().Trim();
                
    //页面内容
                string strPage = string.Empty;
                strSpiderURL 
    = strSpiderURL + strStockCode;
                
    //获取页面数据
                try
                
    {
                    HttpWebRequest request 
    = (HttpWebRequest)WebRequest.Create(strSpiderURL);
                    request.Timeout 
    = 300000;
                    HttpWebResponse response 
    = (HttpWebResponse)request.GetResponse();
                    System.IO.StreamReader srContent 
    = new System.IO.StreamReader(response.GetResponseStream(),System.Text.Encoding.GetEncoding("gb2312"));
                    
    //获取抓取下来的页面内容
                    strPage = srContent.ReadToEnd();
                    response.Close();
                    srContent.Close();
                }

                
    catch(Exception ex)
                
    {
                    htError.Add(strStockCode,
    "错误信息:股票代码["+strStockCode+"]页面不存在!"+ ex.Message +"\r\n出错时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n");
                }

                
                
    if(strPage.IndexOf("股票代码["+strStockCode+"]未找到"> 0)
                
    {
                    htError.Add(strStockCode,
    "错误信息:股票代码["+strStockCode+"]预警信息未找到!\r\n出错时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n");
                }

                
    //整理抓取下来的页面内容
                strPage = strPage.Replace("\r\n","");
                strPage 
    = strPage.Replace("\t","");
                MainForm mainForm 
    = new MainForm();
                
    //过滤并插入
                mainForm.FilterMethod(strStockCode,strPage);
                
    if (callback != null)
                
    {
                    callback(htError);
                }


            }

        }

    }



    主窗体:MainForm.cs

    using System;
    using System.Drawing;
    using System.Collections;
    using System.Configuration;
    using System.ComponentModel;
    using System.Windows.Forms;
    using System.Data;
    using System.Net;
    using System.IO;
    using System.Text.RegularExpressions;
    using IRMSpiderTool.SpiderDB;
    using IRMSpiderTool.DBA;
    using System.Threading;

    namespace IRMSpiderTool
    {
        
    /// <summary>
        
    /// MainForm 的摘要说明。
        
    /// </summary>

        public class MainForm : System.Windows.Forms.Form
        
    {
            
    private System.Windows.Forms.GroupBox gBPrevise;
            
    private System.Windows.Forms.GroupBox gBError;
            
    private System.Windows.Forms.TextBox txtMessage;
            
    private System.Windows.Forms.ProgressBar proBarDispose;
            
    private System.Windows.Forms.Label labProBar;
            
    private System.Windows.Forms.Button btnStartSpider;
            
    private System.Windows.Forms.Timer timerCyc;
            
    private System.ComponentModel.IContainer components;
            
    //错误信息
            private string strError = string.Empty;
            
    //预警提示类别信息
            private static DataTable dtPreviseType;
            
    //主程序运行线程
            private Thread MainThread;

            
    public MainForm()
            
    {
                
    //
                
    // Windows 窗体设计器支持所必需的
                
    //
                InitializeComponent();

                
    //
                
    // TODO: 在 InitializeComponent 调用后添加任何构造函数代码
                
    //
            }


            
    /// <summary>
            
    /// 清理所有正在使用的资源。
            
    /// </summary>

            protected override void Dispose( bool disposing )
            
    {
                
    if( disposing )
                
    {
                    
    if(components != null)
                    
    {
                        components.Dispose();
                    }

                }

                
    base.Dispose( disposing );
            }


            
    Windows 窗体设计器生成的代码

            
    private void StartRun()
            
    {
                txtMessage.Text 
    += "******************开始执行 "+ DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() +"******************\r\n";
                IRMNewsInteDB newsDB 
    = new IRMNewsInteDB();
                strError 
    = string.Empty;
                DataTable dtType 
    = newsDB.GetPreviseType(ref strError);
                DataTable dtCorp 
    = newsDB.GetCorpInfo(ref strError);
                
    if(strError.Length>0)
                
    {
                    txtMessage.Text 
    += "错误信息:" + strError + "!\r\n出错时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n";
                    
    return ;
                }

                
    if(dtCorp.Rows.Count == 0)
                
    {
                    txtMessage.Text 
    += "提示信息:上市公司信息为空!\r\n提示时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n";
                    
    return ;
                }

                
    //开始抓取数据并过滤,然后插入新闻综合库
                FilterDisposeInfo(dtCorp,dtType);
                txtMessage.Text 
    += "******************执行结束 "+DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString()+"******************\r\n";
            }

            
    /// <summary>
            
    /// 功能描述:错误信息回调函数,显示错误信息。
            
    /// </summary>
            
    /// <param name="htErrorInfo">存放错误信息的</param>

            public void ErrorCallback(Hashtable htErrorInfo)
            
    {
                IDictionaryEnumerator myEnumerator 
    = htErrorInfo.GetEnumerator();
                
    while(myEnumerator.MoveNext())
                
    {
                    txtMessage.Text 
    += myEnumerator.Value.ToString() + "\r\n";
                }

                txtMessage.Focus();
                txtMessage.Select(txtMessage.TextLength,
    0);
                txtMessage.ScrollToCaret();
            }

            
    /// <summary>
            
    /// 功能描述:开始抓取页面数据并过滤,然后插入新闻综合库
            
    /// </summary>
            
    /// <param name="dtCorp">CorpInfo表信息</param>
            
    /// <param name="dtType">PreviseType表信息</param>

            public void FilterDisposeInfo(DataTable dtCorp,DataTable dtType)
            
    {
                
    //为静态预警类型表赋值
                dtPreviseType = dtType;
                DataSet dsCorp 
    = new DataSet();
                
    //过滤掉退市的上市公司股票代码
                DataRow[] drCorp = dtCorp.Select("State = 1");
                proBarDispose.Maximum 
    = drCorp.Length;
                proBarDispose.Minimum 
    = 0;
                proBarDispose.Value 
    = 0;
                dsCorp.Merge(drCorp);
                
    foreach (DataRow drCorpCode in dsCorp.Tables[0].Rows)
                
    {
                    
    string strTmpCode = drCorpCode["StockCode"].ToString();
                    SpiderDispose spider 
    = new SpiderDispose(strTmpCode,new ExampleCallback(ErrorCallback));
                    Thread threadSpider 
    = new Thread(new ThreadStart(spider.ConvertDataRunThread));
                    threadSpider.Start();
                    proBarDispose.Value
    ++;
                    Thread.Sleep(
    2000);
                }

            }


            
    /// <summary>
            
    /// 功能描述:抽取出需要的数据,进行插入数据库处理
            
    /// </summary>
            
    /// <param name="strCode">股票代码</param>
            
    /// <param name="strPage">抓取的页面内容</param>

            public void FilterMethod(string strCode,string strPage)
            
    {
                IRMNewsInteDB newsDB 
    = new IRMNewsInteDB();
                
    int iTypeCount = dtPreviseType.Rows.Count;
                
    //根据预警提示各类别名称进行分段抽取数据
                for(int i=0;i<iTypeCount;i++)
                
    {
                    
    string strTmpContent = string.Empty;
                    
    int iTypeStart = strPage.IndexOf(dtPreviseType.Rows[i]["PreviseTypeName"].ToString());
                    
    if(i==iTypeCount-1)
                    
    {
                        
    if(iTypeStart == -1)
                        
    {
                            txtMessage.Text 
    += "错误信息:股票代码["+strCode+"]预警信息<"+dtPreviseType.Rows[i]["PreviseTypeName"].ToString()+">没有数据,请核实内容!\r\n出错时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n";
                            
    continue;
                        }

                        strTmpContent 
    = strPage.Substring(iTypeStart);
                    }

                    
    else
                    
    {
                        
    int iTypeEnd   = strPage.IndexOf(dtPreviseType.Rows[i+1]["PreviseTypeName"].ToString());
                        
    if(iTypeStart == -1 || iTypeEnd==-1)
                        
    {
                            txtMessage.Text 
    += "错误信息:股票代码["+strCode+"]预警信息<"+dtPreviseType.Rows[i]["PreviseTypeName"].ToString()+">没有数据,请核实内容!\r\n出错时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n";
                            
    continue;
                        }

                        strTmpContent 
    = strPage.Substring(iTypeStart,iTypeEnd-iTypeStart);
                    }

                    
    if(strTmpContent == "")
                    
    {
                        txtMessage.Text 
    += "错误信息:股票代码["+strCode+"]预警信息<"+dtPreviseType.Rows[i]["PreviseTypeName"].ToString()+">没有数据,请核实内容!\r\n出错时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n";
                        
    continue;
                    }

                    
    string strRegex = string.Empty;
                    
    //获取某类别的预警信息
                    strRegex = "<td width=\"15%\" class=\"common_text\"><li>(?<Date>.*?)</td>(\\s)*<td width=\"85%\" class=\"common_text\">(?<Content>(\\s)*(.*?)(\\s)*(.*?)(\\s)*(.*?)(\\s)*(.*?)(\\s)*)</td>";
                    
    //(\\s)*表示0或多个空格符、回车符等,*表示比配0或多个。(.*?)表示除回车符外的所有信息
                    MatchCollection TitleMatchs = Regex.Matches(strTmpContent,strRegex , RegexOptions.IgnoreCase | RegexOptions.Multiline );
                    
    //清空抓取的预警信息中URL带有的所有Html标记
                    strError = string.Empty;
                    
    int iSuccess = 0;
                    
    //去掉所有HTML
                    string strRegexHtml = "<.+?>";
                    
    //循环正则表达式所获取的,满足表达式的内容集合
                    foreach(Match NextMatch in TitleMatchs)
                    
    {
                        
    string strTmpReContent = Regex.Replace(NextMatch.Groups["Content"].Value, strRegexHtml ,"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
                        
    //执行插入新闻综合库预警信息表数据
                        iSuccess = newsDB.Previse_Add(strCode,Convert.ToInt32(dtPreviseType.Rows[i]["PreviseTypeID"]),NextMatch.Groups["Date"].Value.Trim(),strTmpReContent.Trim(),ref strError);
                        
    if(iSuccess<0)
                        
    {
                            txtMessage.Text 
    += "错误信息:" + strError + "!\r\n出错时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n";
                            
    continue;
                        }

                    }

                }

            }

            
    private void btnStartSpider_Click(object sender, System.EventArgs e)
            
    {
                
    //启动后按钮应为无效状态
                btnStartSpider.Enabled = false;
                
    //第一次启动主程序
                MainThread = new Thread(new ThreadStart(StartRun));
                MainThread.Start();
                
    //设置工具间隔运行状态是否运行间隔运行
                timerCyc.Start();
            }


            
    private void timerCyc_Tick(object sender, System.EventArgs e)
            
    {
                
    if(timerCyc.Enabled == true && MainThread.IsAlive == false)
                
    {
                    
    if(txtMessage.Text.Length>30000)
                    
    {
                        txtMessage.Text 
    = "";
                    }

                    
    //设置Timer启动状态
                    timerCyc.Enabled = false;
                    
    //主程序运行
                    MainThread = new Thread(new ThreadStart(StartRun));
                    MainThread.Start();
                    
    //指定程序循环运行时间间隔
                    int iClockInterval = 0;
                    iClockInterval 
    = int.Parse(ConfigurationSettings.AppSettings["iClockInterval"].ToString());
                    
    //置回Tick事件间隔时间
                    timerCyc.Interval = iClockInterval;
                    timerCyc.Enabled 
    = true;
                }

            }


            
    private void MainForm_Load(object sender, System.EventArgs e)
            
    {
                
    //指定程序循环运行时间间隔
                int iClockInterval = 0;
                iClockInterval 
    = int.Parse(ConfigurationSettings.AppSettings["iClockInterval"].ToString());
                timerCyc.Interval 
    = iClockInterval;
            }


            
    private void MainForm_Closing(object sender, System.ComponentModel.CancelEventArgs e)
            
    {
                
    //终止主线程
                if(MainThread.IsAlive)
                
    {
                    MainThread.Abort();
                }

            }

        }

    }


  • 相关阅读:
    linux安装redis 完整步骤
    java获取音频文件播放时长
    jar包部署在linux上后浏览器访问不到的问题
    FileRead方法
    FileWrite方法
    用Calendar方法知道月份的天数
    Calendar的用法
    两个时间相减(java简单用法)
    单列体现(Runtime)
    Random方法
  • 原文地址:https://www.cnblogs.com/smallfa/p/892685.html
Copyright © 2011-2022 走看看