zoukankan      html  css  js  c++  java
  • C# 网页信息采集(Form.cs)

    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.Text;
    using System.Windows.Forms;

    using System.Web;
    using System.Net;
    using System.IO;

    using System.Data.SqlClient;
    //using Microsoft.Office.Interop.Excel;
    using System.Threading;
    using WebBee;
    using System.Text.RegularExpressions;


    namespace WebBee
    {
        public partial class Form1 : Form
        {
            public Form1()
            {
                InitializeComponent();
            }
            /// <summary>
            /// kijiji 网
            /// </summary>
            /// <param ></param>
            /// <param ></param>
            private void button1_Click(object sender, EventArgs e)
            {

                //try
                //{
                //    Microsoft.Office.Interop.Excel.Application objApp = new Microsoft.Office.Interop.Excel.Application();
                //    Microsoft.Office.Interop.Excel.Workbooks workbooks = objApp.Workbooks;


                //    //判断目标文件是否存在,若不存在则先创建再打开,若存在则打开;

                //    string strDestFileFullName = "D:\\datadb.xls";
                //    FileInfo destFile = new FileInfo(strDestFileFullName);
                //    destFile = new FileInfo(strDestFileFullName);
                //    object missing = System.Type.Missing;
                //    objApp.DisplayAlerts = false;
                //    workbooks.Open(strDestFileFullName, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing);


                //    Microsoft.Office.Interop.Excel.Sheets objSheets = objApp.Worksheets;
                //    Microsoft.Office.Interop.Excel._Worksheet objSheet;
                //    objSheet = (Microsoft.Office.Interop.Excel._Worksheet)objSheets.get_Item(1);


                //    int fromId = Convert.ToInt32(this.fromID.Text);
                //    int toId = Convert.ToInt32(this.toId.Text);

                //    for (int j = fromId; j <= toId; j++)
                //    {
                //        string Url = fixTextBox.Text;
                //        Url = Url + j;

                //        int i = j - fromId;
                //        this.listBox1.Items.Add(Url);

                //        //得到指定Url的源码
                //        Encoding encoding = Encoding.GetEncoding("utf-8");
                //        string strWebContent = GetWebContent(Url, encoding);
                //        if (strWebContent == "") continue;

                //        //只取数据部分的Html代码
                //        int titleStart = strWebContent.IndexOf("<title>", 0);
                //        int titleEnd = strWebContent.IndexOf("</title>", 0);
                //        string temp_title = strWebContent.Substring(titleStart, titleEnd - titleStart + 8);
                //        int bodyStart = strWebContent.IndexOf("<div main_view\">", 0);

                //        //没有想要的信息,继续下一个
                //        if (bodyStart == -1) continue;
                //        int bodyEnd = strWebContent.IndexOf("</body>", 0);
                //        string needString = strWebContent.Substring(bodyStart, bodyEnd - bodyStart);
                //        needString = temp_title + needString;
                //        try
                //        {
                //            //生成HtmlDocument
                //            WebBrowser webb = new WebBrowser();
                //            webb.Navigate("about:blank");

                //            HtmlDocument htmldoc = webb.Document.OpenNew(false);
                //            htmldoc.Write(needString);

                //            string title = htmldoc.Title;
                //            string content = htmldoc.GetElementById("view_content").InnerText;


                //            string[] firstlinestrs = title.Split('|');

                //            //如果数据不完整,放弃这个文件
                //            if (firstlinestrs.Length != 3) continue;
                //            string[] areas = firstlinestrs[1].Split(',');
                //            string[] sorts ={ "", "", "" };

                //            if (firstlinestrs[2] != null)
                //            {
                //                sorts = firstlinestrs[2].Split(',');
                //            }
                //            else
                //            {

                //            }

                //            objSheet.Cells[i + 1, 1] = firstlinestrs[0];

                //            //区域--------------------------------------------
                //            objSheet.Cells[i + 1, 2] = areas[0];
                //            if (areas.Length >= 2)
                //            {
                //                objSheet.Cells[i + 1, 3] = areas[1];
                //            }
                //            else
                //            {
                //                objSheet.Cells[i + 1, 3] = "";
                //            }
                //            if (areas.Length >= 3)
                //            {
                //                objSheet.Cells[i + 1, 4] = areas[2];
                //            }
                //            else
                //            {
                //                objSheet.Cells[i + 1, 4] = "";
                //            }

                //            //分类-----------------------------------------
                //            objSheet.Cells[i + 1, 5] = sorts[0];
                //            if (sorts.Length >= 2)
                //            {
                //                objSheet.Cells[i + 1, 6] = sorts[1];
                //            }
                //            else
                //            {
                //                objSheet.Cells[i + 1, 6] = "";
                //            }
                //            if (sorts.Length >= 3)
                //            {
                //                objSheet.Cells[i + 1, 7] = sorts[2];
                //            }
                //            else
                //            {
                //                objSheet.Cells[i + 1, 7] = "";
                //            }

                //            //内容
                //            objSheet.Cells[i + 1, 8] = content;

                //        }
                //        catch (Exception ex)
                //        {

                //        }

                //    }

                //    string filename = "d:\\dd.xls";
                //    objSheet.SaveAs(filename, missing, missing, missing, missing, missing, missing, missing, missing, missing);

                //    workbooks.Close();
                //    objApp.Quit();


                //}
                //catch (Exception ex)
                //{

                //}
            }

            //根据Url地址得到网页的html源码
            private string GetWebContent(string Url, Encoding encoding)
            {
                string strResult = "";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    //声明一个HttpWebRequest请求
                    request.Timeout = 30000;
                    //设置连接超时时间
                    request.Headers.Set("Pragma", "no-cache");
                    // request.Headers.Set("KeepAlive", "true");
                    request.CookieContainer = new CookieContainer();
                    request.Credentials = CredentialCache.DefaultCredentials;
                    request.Referer = Url;

                    request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";

                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();

                    StreamReader streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                    streamReceive.Close();
                    streamReader.Close();
                    streamReceive = null;
                    streamReader = null;
                }
                catch
                {
                    return "";
                }
                return strResult;
            }

            /// <summary>
            /// 点评网
            /// </summary>
            /// <param ></param>
            /// <param ></param>
            private void button2_Click(object sender, EventArgs e)
            {

                int fromID = Convert.ToInt32(this.fromID.Text);
                int toId = Convert.ToInt32(this.toId.Text);
                for (int i = fromID; i < toId; i++)
                {

                    string Url = this.fixTextBox.Text + i;

                    //得到指定Url的源码
                    Encoding encoding = Encoding.GetEncoding("utf-8");
                    string strWebContent = GetWebContent(Url, encoding);
                    if (strWebContent.IndexOf("该商户不存在</title>") != -1) continue;
                    string needstr = "";
                    int infostart = strWebContent.IndexOf("<div Main\">", 0);
                    int infoend = strWebContent.IndexOf("<div Votes\">", 0);


                    needstr = strWebContent.Substring(infostart, infoend - infostart);


                    //

                    try
                    {
                        //生成HtmlDocument
                        WebBrowser webb = new WebBrowser();
                        webb.Navigate("about:blank");

                        HtmlDocument htmldoc = webb.Document.OpenNew(false);
                        htmldoc.Write(needstr);
                        ShopInfo shopInfo = GetShopInfo(htmldoc);
                        shopInfo.Shopurl = Url;


                        this.insertData(shopInfo);


                    }
                    catch (Exception ex)
                    {

                    }
                }
            }

            /// <summary>
            /// 取出列表页面的店铺URL
            /// </summary>
            /// <param ></param>
            /// <returns></returns>

            private string[] getNeedpages(HtmlDocument htmldoc)
            {
                string[] s = new string[20];
                for (int i = 0; i < 20; i++)
                {
                    string idname = "_ctl0__ctl" + i + "_HShopName";
                    string url = htmldoc.GetElementById(idname).GetAttribute("href");
                    url = url.Substring(11, url.Length - 11);
                    url = "http://www.dianping.com/" + url;
                    s[i] = url;
                }

                return s;
            }

            /// <summary>
            /// 取出shopInfo对象
            /// </summary>
            /// <param ></param>
            /// <returns></returns>
            private ShopInfo GetShopInfo(HtmlDocument htmldoc)
            {
                string bodyHtml = htmldoc.Body.InnerHtml;

                ShopInfo Si = new ShopInfo();

                HtmlElementCollection areas_hc = htmldoc.GetElementsByTagName("div");

                if (areas_hc != null)
                {

                    Si.Areas = areas_hc[2].InnerText;
                    Si.Areas = Si.Areas.Replace(">", "|");

                }


                HtmlElementCollection h1_hc = htmldoc.GetElementsByTagName("h1");


                if (h1_hc.Count != 0)
                {
                    Si.Name = h1_hc[0].InnerText;
                    Si.Name = Si.Name.Replace("&nbsp;", " ").Trim(); ;

                }
                else
                {
                    Si.Name = "";
                }

                HtmlElementCollection span_hc = htmldoc.GetElementsByTagName("span");
                if (span_hc.Count != 0)
                {
                    Si.ChildName = span_hc[0].InnerText;
                    Si.ChildName = Si.ChildName.Replace("&nbsp;", " ");
                }
                else
                {
                    Si.ChildName = "";
                }
                int startAdress = bodyHtml.IndexOf("<B>地址:</B>", 0);
                int endAddress = 0;

                if (startAdress != -1)
                {
                    endAddress = bodyHtml.IndexOf("<BR>", startAdress, 200);
                    Si.Address = bodyHtml.Substring(startAdress + 10, endAddress - startAdress - 10);
                    Si.Address = Si.Address.Replace("&nbsp;", " ").Trim();
                    if (Si.Address.IndexOf("<A class") != -1)
                    {
                        Si.Address = Si.Address.Substring(0, Si.Address.IndexOf("<A class"));

                    }
                }
                else
                {
                    Si.Address = "";
                }
                //------------------------------
                startAdress = bodyHtml.IndexOf("<B>电话:</B>", 0);

                if (startAdress != -1)
                {
                    endAddress = bodyHtml.IndexOf("<BR>", startAdress, 100);
                    Si.Phone = bodyHtml.Substring(startAdress + 10, endAddress - startAdress - 10);
                    Si.Phone = Si.Phone.Replace("&nbsp;", " ").Trim();
                }
                else
                {
                    Si.Phone = "";
                }

                //------------------------------
                startAdress = bodyHtml.IndexOf("<B>别名:</B>", 0);


                if (startAdress != -1)
                {
                    endAddress = bodyHtml.IndexOf("<BR>", startAdress, 100);
                    Si.Bm = bodyHtml.Substring(startAdress + 10, endAddress - startAdress - 10);
                    Si.Bm = Si.Bm.Replace("&nbsp;", " ").Trim();
                }
                else
                {
                    Si.Bm = "";
                }

                //-----------------------------
                if (htmldoc.GetElementById("ShopTag") != null)
                {


                    Si.Tag = htmldoc.GetElementById("ShopTag").InnerText;
                    Si.Tag = parseTag(Si.Tag);
                }
                else
                {

                    Si.Tag = "";

                }
                //-----------------------------ShopDishs
                if (htmldoc.GetElementById("ShopComment") != null)
                {


                    Si.Content = htmldoc.GetElementById("ShopComment").InnerText;
                }
                else
                {

                    Si.Content = "";

                }
                //-----------------------------
                if (htmldoc.GetElementById("ShopDish") != null)
                {


                    Si.Dish = htmldoc.GetElementById("ShopDish").InnerText;
                    Si.Dish = parseTag(Si.Dish);
                }
                else
                {

                    Si.Dish = "";

                }
                if (htmldoc.GetElementById("ShopDish") != null)
                {
                    Si.Imagesurl = htmldoc.GetElementById("ShopPhoto").GetElementsByTagName("IMG")[0].GetAttribute("src");


                }

                htmldoc = null;
                bodyHtml = null;
                areas_hc = null;
                span_hc = null;
                return Si;
            }

            /// <summary>
            /// 解析字符串
            /// </summary>
            /// <param ></param>
            /// <returns></returns>

            private string parseTag(string tagStr)
            {
                string[] temp = tagStr.Split(' ');
                string returnStr = "";
                for (int i = 0; i < temp.Length; i++)
                {
                    if (temp[i] != "")
                    {
                        returnStr = returnStr + temp[i].Substring(0, temp[i].IndexOf("(")) + "|";
                    }

                }
                if (returnStr.EndsWith("|")) returnStr = returnStr.Substring(0, returnStr.Length - 1);

                return returnStr;

            }


            private void insertData(ShopInfo si)
            {


                StringBuilder strSql = new StringBuilder();
                strSql.Append("insert into getTempData(");
                strSql.Append("name,bm,childName,phone,address,tag,dish,content,areas,imagesurl,url");
                strSql.Append(") values (");
                strSql.Append("@name,@bm,@childName,@phone,@address,@tag,@dish,@content,@areas,@imagesurl,@url)");

                SqlParameter[] parameters = {
         new SqlParameter("@name", SqlDbType.VarChar,128),
         new SqlParameter("@bm",SqlDbType.VarChar,50),
         new SqlParameter("@childName", SqlDbType.VarChar,50),
         new SqlParameter("@phone", SqlDbType.VarChar,100),
         new SqlParameter("@address", SqlDbType.VarChar,200),
         new SqlParameter("@tag", SqlDbType.VarChar,256),
         new SqlParameter("@dish", SqlDbType.VarChar,256),
         new SqlParameter("@content", SqlDbType.VarChar,4096),
         new SqlParameter("@areas", SqlDbType.VarChar,256),
         new SqlParameter("@imagesurl", SqlDbType.VarChar,128),
                        new SqlParameter("@url", SqlDbType.VarChar,64)
                                    };
                parameters[0].Value = si.Name;
                parameters[1].Value = si.Bm;
                parameters[2].Value = si.ChildName;
                parameters[3].Value = si.Phone;
                parameters[4].Value = si.Address;
                parameters[5].Value = si.Tag;
                parameters[6].Value = si.Dish;
                parameters[7].Value = si.Content;
                parameters[8].Value = si.Areas;
                parameters[9].Value = si.Imagesurl;
                parameters[10].Value = si.Shopurl;

                DBHelper.ExecuteSql(strSql.ToString(), parameters);
            }
            private bool mIsRunCtrip = false;

            public bool IsRunCtrip
            {
                get { return mIsRunCtrip = false; }
                set { mIsRunCtrip = value; }
            }
            delegate void dFrist(int pIndext);
            delegate void d8JMain(int i);
            dFrist dfrist = null;
            d8JMain d8jmain = null;
            private void button3_Click(object sender, EventArgs e)
            {
                progressBar1.Maximum = Convert.ToInt32(this.toId.Text) - Convert.ToInt32(this.fromID.Text);
                this.Text = "正在采集携程网,请等待。。。";
                dfrist = new dFrist(showmsg);
                Thread tCtrip = new Thread(StartCtrip);
                tCtrip.IsBackground = true;
                tCtrip.Start();


                // this.fixTextBox.Text = "已完成携程网采集,请根目录查看日志";
             
            }
            void showmsg(int i)
            {

                progressBar1.Value++;
                label4.Text = "现在ID:" + i.ToString() + ",已完成:" + (progressBar1.Value / (float)progressBar1.Maximum).ToString("p");
                if (progressBar1.Value == progressBar1.Maximum)
                    MessageBox.Show("finish");
            }
            void StartCtrip()
            {
                CtripInfo ci = null;
                int fromID = Convert.ToInt32(this.fromID.Text);
                int toId = Convert.ToInt32(this.toId.Text);
                for (int i = fromID; i <= toId; i++)
                {
                    this.BeginInvoke(dfrist, new object[] { i });
                    try
                    {
                        ci = new CtripInfo().GetCtripInfoByHotelID(i);
                        if (ci != null)
                        {
                            int System.Windows.Forms.Application.DoEvents();
                            if (null != ci.HotelPrice)
                            {
                                for (int ii = 1; ii < ci.HotelPrice.Count; ii++)
                                {
                                    ci.AddHotelPrice(ci.HotelPrice[ii], id);
                                }
                            }
                            else
                            {
                                continue;
                            }
                        }
                        else
                        {
                            continue;
                        }
                        ci = null;

                    }
                    catch (Exception ex)
                    {

                        TextWriter tw = new StreamWriter(System.Windows.Forms.Application.StartupPath + "\\CtripErrorlog.log", true);
                        tw.Flush();
                        tw.WriteLine("错误ID: " + i.ToString() + "\r\n 错误原因:" + ex.Message);
                        tw.WriteLine("-------------------------------------------");
                        tw.Flush();
                        tw.Close();
                        tw = null;
                        ci = null;
                        continue;
                    }
                }

            }
            void GetCtripHotel(int pStart, int pEnd)
            {
                IsRunCtrip = false;
            }

            private void button4_Click(object sender, EventArgs e)
            {
                //System.Data.DataSet dt = DBHelper.Query("select * from hotel price");
                //string sql = "update hotelprice2 set RoomNewPrice='{0}',BroadBand='{1}',BedType='{2}' where ";
                //if (dt != null)
                //{
                //    for (int i = 0; i < dt.Tables[0].Rows.Count; i++)
                //    {
                //        string id"].ToString();
                //        string Oldprice = dt.Tables[0].Rows[i]["RoomNewPrice"].ToString();
                //        string Oldband = dt.Tables[0].Rows[i]["BroadBand"].ToString();
                //        string OldBedType = dt.Tables[0].Rows[i]["BroadBand"].ToString();
                //        if (Oldprice.IndexOf("&nbsp") > 1)
                //        {
                //            int start = Oldprice.IndexOf("&nbsp");
                //            string newprice = Oldprice.Substring(0, start);
                //            string newband = Oldprice.Substring(0, start + 5);

                //            sql = string.Format(sql, newprice, newband, Oldband);
                //            if (DBHelper.ExecuteSql(sql) > 0)
                //            {
                //                newband = newprice = null;
                //                continue;
                //            }
                //        }
                //        else
                //        {
                //            continue;
                //        }

                //    }


                //}

                //System.Data.DataSet dt = DBHelper.Query("select * from hotelinfo");
                //string sql = "update hotelinfo set subshop='{0}',Name='{1}' where ;
                //if (dt != null)
                //{
                //    for (int i = 0; i < dt.Tables[0].Rows.Count; i++)
                //    {
                //        sql = "update hotelinfo set subshop='{0}',Name='{1}' where ;
                //        string id"].ToString();
                //        string OldName = dt.Tables[0].Rows[i]["Name"].ToString();
                //        if (OldName.IndexOf("(") > 1)
                //        {
                //            int start = OldName.IndexOf("(");
                //            string newName = OldName.Substring(0, start);
                //            string subshop = OldName.Substring(start + 1, OldName.Length - start - 2);
                //            sql = string.Format(sql, subshop, newName, id);

                //            if (DBHelper.ExecuteSql(sql) > 0)
                //            {
                //                = newName = subshop = null;
                //                continue;
                //            }
                //        }
                //        else
                //        {
                //            continue;
                //        }

                //    }


                //}
                //          北京    >                          朝阳区    >                          燕莎/酒仙桥/丽都         
                System.Data.DataSet dt = DBHelper.Query("select * from hotelinfo");
                string sql = "update hotelinfo set position='{0}' where ;
                if (dt != null)
                {
                    for (int i = 37; i < dt.Tables[0].Rows.Count; i++)
                    {
                        sql = "update hotelinfo set position='{0}' where ;
                        string id"].ToString();
                        string OldName = dt.Tables[0].Rows[i]["area"].ToString();
                        if (OldName.IndexOf(">") > 1)
                        {
                            OldName = OldName.Replace(" ", "");
                            int start = OldName.IndexOf(">");
                            start++;
                            int end = OldName.IndexOf(">", start);
                            if (end > -1)
                                OldName = OldName.Substring(start, end - start);
                            else
                                OldName = OldName.Substring(start);
                            sql = string.Format(sql, OldName, id);

                            if (DBHelper.ExecuteSql(sql) > 0)
                            {
                                = null;
                                continue;
                            }
                        }
                        else
                        {
                            continue;
                        }
                    }
                }
                MessageBox.Show("i am ok");

            }
            /// <summary>
            /// 删除HTML标识
            /// </summary>
            /// <param ></param>
            /// <returns></returns>
            public string DropHTMLTag(string htmlString)
            {
                htmlString = Regex.Replace(htmlString, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"-->", "", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
                htmlString = Regex.Replace(htmlString, @"<!--.*", "", RegexOptions.IgnoreCase);
                //htmlString = Regex.Replace(htmlString, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
                //htmlString = Regex.Replace(htmlString, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
                //htmlString = Regex.Replace(htmlString, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
                //htmlString = Regex.Replace(htmlString, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
                //htmlString = Regex.Replace(htmlString, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
                //htmlString = Regex.Replace(htmlString, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
                //htmlString = Regex.Replace(htmlString, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
                //htmlString = Regex.Replace(htmlString, @"&#(\d+);", "", RegexOptions.IgnoreCase);

                //htmlString.Replace("<", "");
                //htmlString.Replace(">", "");
                //htmlString.Replace("\r\n", "");


                return htmlString;
            }

            #region 悟能啊悟能


            private void btn8j_Click(object sender, EventArgs e)
            {
                progressBar1.Maximum = 18;
                this.Text = "正在采集悟能,请等待。。。";
                d8jmain = new d8JMain(showmsg8j);
                Thread tCtrip = new Thread(Start8j);
                tCtrip.IsBackground = true;
                tCtrip.Start();
                Start8j();

            }
            void showmsg8j(int i)
            {

                progressBar1.Value++;
                label4.Text = string.Format("现是ID:{0},已完成:{1}" + i, (progressBar1.Value / (float)progressBar1.Maximum).ToString("p"));
                if (progressBar1.Value == progressBar1.Maximum)
                    MessageBox.Show("finish");
            }
            void Start8j()
            {


                string url = "http://bj.8j.com/biz/restaurants/BJS0{0}";
                List<string> HotelLink = new List<string>();
                for (int i = 1; i <= 18; i++)
                {

                    url = string.Format("http://bj.8j.com/biz/restaurants/BJS0{0:d2}", i);
                    url = GetWebContent(url, Encoding.UTF8);
                    if (null == url && string.Empty == url && url.IndexOf("对不起") < 1)
                    {
                        continue;
                    }
                    int count = GetAreaHotelCount(url);
                    url = GetShortHTMLContent(url);

                    HotelLink.AddRange(GetHtml(url));
                    for (int ss = 0; ss < HotelLink.Count; ss++)
                    {
                        insert8jAllHotelLink(HotelLink[ss], i);
                    }
                    HotelLink.Clear();
                    for (int j = 2; j <= count; j++)
                    {
                        try
                        {

                            // Thread.Sleep(10);

                            url = null;
                            url = string.Format("http://bj.8j.com/biz/restaurants/BJS0{0:d2}/{1}", i, j);
                            url = GetWebContent(url, Encoding.UTF8);
                            if (null == url && string.Empty == url && url.IndexOf("对不起") < 1)
                            {
                                continue;
                            }
                            url = GetShortHTMLContent(url);
                            HotelLink.AddRange(GetHtml(url));
                            for (int ss = 0; ss < HotelLink.Count; ss++)
                            {
                                insert8jAllHotelLink(HotelLink[ss], i);
                            }
                            HotelLink.Clear();
                            // this.BeginInvoke(d8jmain, new object[] { i, j, count });
                        }
                        catch (Exception ex)
                        {

                            TextWriter tw = new StreamWriter(System.Windows.Forms.Application.StartupPath + "\\CtripErrorlog.log", true);
                            tw.Flush();
                            tw.WriteLine("错误ID: " + j.ToString() + "\r\n 错误原因:" + ex.Message);
                            tw.WriteLine("-------------------------------------------");
                            tw.Flush();
                            tw.Close();
                            tw = null;
                            continue;
                        }
                    }


                }
                url = null;

            }
            /// <summary>
            /// 把所有抓到的HTMlLink存取
            /// </summary>
            /// <param ></param>
            /// <returns></returns>
            bool insert8jAllHotelLink(string kk, int id)
            {
                string sql = "INSERT INTO [a]([ssss],area) VALUES('{0}','{1}')";
                sql = string.Format(sql, kk, GetAreaInfo(id));
                return DBHelper.ExecuteSql(sql) > 0;
            }
            string GetAreaInfo(int id)
            {
                switch (id)
                {
                    case 1:
                        return "朝阳区";

                    case 2:
                        return "海淀区";

                    case 3:

                        return "东城区";

                    case 4:
                        return "西城区";
                    case 5:
                        return "宣武区";
                    case 6:

                        return "崇文区";
                    case 7:
                        return "丰台区";
                    case 8:
                        return "石景山区";
                    case 9:
                        return "房山区";
                    case 10:
                        return "通州区";
                    case 11:
                        return "昌平区";
                    case 12:
                        return "顺义区";
                    case 13:
                        return "大兴区";
                    case 14:
                        return "怀柔区";
                    case 15:
                        return "门头沟区";
                    case 16:
                        return "平谷区";
                    case 17:
                        return "延庆县";
                    case 18:
                        return "密云县";
                    default:
                        return "其它地区";

                }


            }
            /// <summary>
            /// 去掉干扰字符
            /// </summary>
            /// <param ></param>
            /// <returns></returns>
            string GetShortHTMLContent(string line)
            {
                int start = 0, end = 0;

                start = line.IndexOf("<!-- bizlist 列表1开始  -->");
                end = line.IndexOf("<!-- bizlist 列表1结束  -->", start);
                line = line.Substring(start, end - start);
                line = line.Replace("\r", "");
                line = line.Replace("\n", "");
                line = line.Replace("\t", "");
                line = line.Replace("<a", "ウ");
                line = line.Replace("/a>", "ウ");
                return DropHTMLTag(line);


            }
            /// <summary>
            /// 页的所有酒店
            /// </summary>
            /// <param ></param>
            /// <returns>string[]</returns>
            string[] GetHtml(string line)
            {
                try
                {

                    string tmp = null;
                    int start = 0, end = 0;
                    List<string> sss = new List<string>();
                    while (line.IndexOf("href=\"", start) > 0)
                    {
                        line = line.Replace(" ", "");
                        start = line.IndexOf("href=\"", start);
                        if (start < 1)
                            continue;

                        start = start + 6;
                        end = line.IndexOf("\"target", start);
                        if (end < 1)
                            continue;
                        tmp = line.Substring(start, end - start);
                        if (sss.Count == 0)
                        {

                            sss.Add(tmp);
                        }
                        else
                        {
                            if (sss[sss.Count - 1] == tmp)
                            {
                                continue;
                            }
                            else
                            {
                                sss.Add(tmp);
                            }
                        }

                        start = end;
                    }
                    return sss.ToArray();
                }
                catch (Exception ex)
                {
                    return null;

                }

            }
            /// <summary>
            /// 获得酒店总页数
            /// </summary>
            /// <param ></param>
            /// <returns></returns>
            int GetAreaHotelCount(string line)
            {
                int start = 0, end = 0;
                start = line.IndexOf("<!-- pgdn start  -->");
                if (start < 1) return 0;

                end = line.IndexOf("<!-- pgdn end  -->", start);
                if (end < 1) return 0;
                line = line.Substring(start, end - start);
                line = DropHTMLTag(line);
                start = line.IndexOf("共");
                if (start < 1) return 0;
                start++;
                end = line.IndexOf("页", start);
                if (end < 1) return 0;
                return int.Parse(line.Substring(start, end - start));

            }
            #endregion


            #region 悟能详细页


            private void button5_Click(object sender, EventArgs e)
            {
                progressBar1.Maximum = int.Parse(DBHelper.GetSingle("select count(*) from a").ToString());
                this.Text = "正在采集悟能,请等待。。。";
                dfrist = new dFrist(showmsg);
                Thread tCtrip = new Thread(beginGetHotel);
                tCtrip.IsBackground = true;
                tCtrip.Start();
                //  beginGetHotel();
            }
            void beginGetHotel()
            {
                System.Data.DataSet dt = DBHelper.Query("select * from a");
                string url = null, tmp = null;
                string tel = null, jAdd = null, onlyfoot = null, Address = null, postcode = null, area = null, tag = null, siteurl = null, remark = null;
                for (int i = 0; i < dt.Tables[0].Rows.Count; i++)
                {

                    try
                    {
                        this.BeginInvoke(dfrist, new object[] { i });
                        jAdd = url = dt.Tables[0].Rows[i][1].ToString();
                        url = GetWebContent(url, Encoding.UTF8);
                        url = url.Replace("\r", "");
                        url = url.Replace("\n", "");
                        url = url.Replace("\t", "");
                        if (null != url && string.Empty != url)
                        {
                            int start = 0, end = 0;
                            start = url.IndexOf("<!-- qyxx  开始  -->");
                            if (start < 1)
                            {
                                continue;
                            }
                            end = url.IndexOf("<!-- info nav end -->", start);
                            tmp = url.Substring(start, end - start);
                            start = tmp.IndexOf("<strong>");
                            if (start < 1)
                            {
                                ;
                            }
                            else
                            {
                                start += 8;
                                end = tmp.IndexOf("</strong>", start);
                                end - start);
                                }

                            start = tmp.IndexOf("<strong>", end);
                            start += 8;
                            end = tmp.IndexOf("</strong>", start);
                            tel = tmp.Substring(start, end - start);
                            start = tmp.IndexOf("<p>", end);
                            start += 3;
                            end = tmp.IndexOf("<br/>", start);
                            Address = tmp.Substring(start, end - start);
                            Address = ReplaceSingleQuotes(Address);
                            start = end;
                            start += 5;
                            end = tmp.IndexOf("</p>", end);
                            postcode = tmp.Substring(start, end - start);
                            //area=dt.Tables[0].Rows[i][2].ToString();
                            start = tmp.IndexOf("区域:", end);
                            if (start < 1)
                            {
                                area = "未知";
                            }
                            else
                            {
                                start += 3;
                                end = tmp.IndexOf("</p>", start);
                                area = DropHTMLTag(tmp.Substring(start, end - start));
                                area = ReplaceSingleQuotes(area);
                            }


                            start = tmp.IndexOf("标签:", end);
                            if (start < 1)
                            {
                                tag = "未知";
                            }
                            else
                            {
                                start += 3;
                                end = tmp.IndexOf("</p>", start);
                                tag = DropHTMLTag(tmp.Substring(start, end - start));
                                tag = ReplaceSingleQuotes(tag);
                            }


                            start = tmp.IndexOf("网址:", end);
                            if (start < 1)
                            {
                                siteurl = "未知";
                            }
                            else
                            {
                                start += 3;
                                end = tmp.IndexOf("</p>", start);
                                siteurl = DropHTMLTag(tmp.Substring(start, end - start));
                            }
                            start = url.IndexOf("特色推荐:");
                            if (start < 1)
                            {
                                onlyfoot = "未知";
                            }
                            else
                            {
                                start += 5;
                                end = url.IndexOf("</p>", start);
                                onlyfoot = url.Substring(start, end - start);
                                onlyfoot = ReplaceSingleQuotes(onlyfoot);
                            }

                            start = url.IndexOf("<!-- jj开始  -->");
                            if (start < 1)
                            {
                                remark = "未知";
                            }
                            else
                            {

                                end = url.IndexOf("<!-- jj结束  -->", start);
                                tmp = url.Substring(start, end - start);
                                if (tmp.IndexOf("简介") < 1)
                                {
                                    remark = "未知";

                                }
                                else
                                {
                                    start = tmp.IndexOf("简介");
                                    end = tmp.IndexOf("</h4>", start);
                                    remark = DropHTMLTag(tmp.Substring(start, end - start));
                                    remark = ReplaceSingleQuotes(remark);
                                }

                            }

                            inser8jHotlInfo(name, tel, onlyfoot, Address, postcode, area, tag, siteurl, remark, jAdd);
                            = onlyfoot = Address = postcode = area = tag = siteurl = remark = null;
                        }
                    }
                    catch (Exception ex)
                    {

                        TextWriter tw = new StreamWriter(System.Windows.Forms.Application.StartupPath + "\\CtripErrorlog.log", true);
                        tw.Flush();
                        tw.WriteLine("错误ID: " + i.ToString() + "\r\n 错误原因:" + ex.Message);
                        tw.WriteLine("-------------------------------------------");
                        tw.Flush();
                        tw.Close();
                        tw = null;
                        continue;
                    }
                }

            }
            /// <summary>
            /// 替换单引号成中文的单引号
            /// </summary>
            /// <param ></param>
            /// <returns></returns>
            public string ReplaceSingleQuotes(string hTML)
            {
                return hTML.Replace("\'", "`");
            }
            void inser8jHotlInfo(string name, string tel, string onlyfoot, string Address, string postcode, string area, string tag, string siteurl, string remark, string jAdd)
            {
                string sql = "INSERT INTO [HotelInfo]([name], [Address], [Tel], [Postcode], [area], [tag], [siteurl], [onlyfoot], [remark],[8jAddress]) VALUES('{0}','{1}','{2}','{3}','{4}','{5}','{6}','{7}','{8}','{9}')";
                sql = string.Format(sql, name, Address, tel, postcode, area, tag, siteurl, onlyfoot, remark, jAdd);
                DBHelper.ExecuteSql(sql);
            }

            #endregion

            private void Form1_Load(object sender, EventArgs e)
            {

            }

            private void button6_Click(object sender, EventArgs e)
            {
                System.Data.DataSet dt1 = DBHelper.Query("select star5,city from [00]");
                System.Data.DataSet dt2 = DBHelper.Query("select id,city from ctriptwo");
                string sql = null;
                for (int i = 0; i < dt2.Tables[0].Rows.Count; i++)
                {
                    for (int ii = 0; ii < dt1.Tables[0].Rows.Count; ii++)
                    {
                        if (dt2.Tables[0].Rows[i]["city"].ToString() == dt1.Tables[0].Rows[ii]["city"].ToString())
                        {
                            sql = "UPDATE ctriptwo set startLevelcount0={0} where ;
                            sql = string.Format(sql, dt1.Tables[0].Rows[ii]["star5"], dt2.Tables[0].Rows[i]["id"]);
                            DBHelper.ExecuteSql(sql);
                        }
                        else
                            continue;

                    }

                    /*
             INSERT INTO [LocalTest].[dbo].[ctripTwo]
               ([startLevelcount0]
               ,[startLevel5count4]
               ,[startLevel5count5]
               ,[startLevel5count3]
               ,[startLevel5count2]
               ,[startLevel5count1]
               ,[City])
         VALUES
                     */
                   
                  
                }
                MessageBox.Show("ok");
            }


           

            private void button7_Click(object sender, EventArgs e)
            {
               
                this.Text = "正在采集点评,请等待...";
                label5.Text=System.DateTime.Now.ToString("hh时mm分ss秒");          
                Thread tCtrip = new Thread(Shop);
                tCtrip.IsBackground = true;
                tCtrip.Start();                      
                                     
            }
            public void Shop()
            {
                int shopid;
                int log = 0; int iDivStart = 0;
                string StrSql="";
                string ShopTag = "";
                string DianpingInfo="";
               
                string ShopCity = ""; string ShopArea = ""; string ShopID = ""; string ShopName = ""; string ShopSort = ""; string ShopAddress = ""; string ShopPhone = "";
                try
                {
                    for (shopid = 1612436; shopid < 2700000; shopid++)
                    {
                        string Url = "http://www.dianping.com/shop/" + shopid;
                        string strResult = GetHtmlCode(Url);
                        if (ReturnMsg(strResult))
                        {
                            try
                            {
                                string divStart = @"<div ShopGuide"">";
                                string divEnd = @"<div Reviews"">";
                                iDivStart = strResult.IndexOf(divStart);
                                int iDivEnd = strResult.IndexOf(divEnd);
                                DianpingInfo = strResult.Substring(iDivStart, iDivEnd - iDivStart);
                            }
                            catch (Exception ex)
                            {

                                TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                tw.Flush();
                                tw.WriteLine("错误原因:" + ex.Message + Url);
                                tw.WriteLine("-------------------------------------------");
                                tw.Flush();
                                tw.Close();
                                tw = null;

                            }
                            //提取导航条中信息
                            try
                            {
                                string NavigationInfo = @"<div Shop"">";
                                int NavigationEnd = strResult.IndexOf(NavigationInfo);
                                string Navigation = strResult.Substring(iDivStart, NavigationEnd - iDivStart);
                                string str = DropHTMLTag(Navigation).Replace("&nbsp;", "ml").Replace(">", "ml");
                                string ml = "ml";
                                string[] resultString = Regex.Split(str, ml, RegexOptions.IgnoreCase);
                                string str1 = (resultString.Length).ToString();
                                string str2 = "5";
                                string str3 = "7";
                                string str4 = "6";

                                if (str1 == str2)
                                {
                                    try
                                    {
                                        //提取地址ShopAddress
                                        int AddressEnd = DianpingInfo.IndexOf("地图");
                                        int AddressStart = DianpingInfo.IndexOf("<b>地址:");
                                        if (AddressEnd > 0)
                                        {
                                            ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                        }
                                        else if ((AddressEnd = DianpingInfo.IndexOf("电话:")) > 0)
                                        {
                                            ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                        }
                                        else
                                        {
                                            AddressEnd = DianpingInfo.IndexOf(@"<div Update"">");
                                            ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                        }
                                    }
                                    catch (Exception ex)
                                    {

                                        TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                        tw.Flush();
                                        tw.WriteLine("错误原因:" + ex.Message + Url);
                                        tw.WriteLine("-------------------------------------------");
                                        tw.Flush();
                                        tw.Close();
                                        tw = null;

                                    }

                                    //商店ID名ShopID
                                    ShopID = shopid.ToString();
                                    //提取所在城市ShopCity
                                    ShopCity = resultString[0].ToString();
                                    //商店名ShopName
                                    ShopName = resultString[4].ToString().Replace("'", ".");
                                    //商店所在区
                                    ShopArea = resultString[2].ToString();
                                    //商店属于购物类别
                                    ShopSort = resultString[3].ToString();

                                    try
                                    {   //商店联系电话
                                        int PhoneStart = strResult.IndexOf("电话:");
                                        //有电话走这边,没电话不添加
                                        if (PhoneStart.ToString() != "-1")
                                        {
                                            int PhoneEnd = strResult.IndexOf(@"<div Update"">");
                                            ShopPhone = DropHTMLTag(strResult.Substring(PhoneStart, PhoneEnd - PhoneStart)).Replace("电话:", "").Replace("&nbsp;", "");

                                        }
                                        else
                                        {
                                            ShopPhone = "无联系电话";
                                        }
                                    }
                                    catch (Exception ex)
                                    {

                                        TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                        tw.Flush();
                                        tw.WriteLine("错误原因:" + ex.Message + Url);
                                        tw.WriteLine("-------------------------------------------");
                                        tw.Flush();
                                        tw.Close();
                                        tw = null;

                                    }


                                    try
                                    {
                                        //分类标签ShopTag
                                        int TagStart = strResult.IndexOf(@"<div ShopTag"">");
                                        int TagEnd = strResult.IndexOf(@"<div Reviews"">");

                                        ShopTag = DropHTMLTag(strResult.Substring(TagStart, TagEnd - TagStart)).Replace("&nbsp;", "");
                                    }

                                    catch (Exception ex)
                                    {

                                        TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                        tw.Flush();
                                        tw.WriteLine("错误原因:" + ex.Message + Url);
                                        tw.WriteLine("-------------------------------------------");
                                        tw.Flush();
                                        tw.Close();
                                        tw = null;

                                    }

                                }
                                else if (str1 == str3)
                                {

                                    try
                                    {
                                        //提取地址ShopAddress
                                        int AddressEnd = DianpingInfo.IndexOf("地图");
                                        int AddressStart = DianpingInfo.IndexOf("<b>地址:");
                                        if (AddressEnd > 0)
                                        {
                                            ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                        }
                                        else if ((AddressEnd = DianpingInfo.IndexOf("电话:")) > 0)
                                        {
                                            ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                        }
                                        else
                                        {
                                            AddressEnd = DianpingInfo.IndexOf(@"<div Update"">");
                                            ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                        }
                                    }
                                    catch (Exception ex)
                                    {

                                        TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                        tw.Flush();
                                        tw.WriteLine("错误原因:" + ex.Message + Url);
                                        tw.WriteLine("-------------------------------------------");
                                        tw.Flush();
                                        tw.Close();
                                        tw = null;

                                    }

                                    //商店ID名ShopID
                                    ShopID = shopid.ToString();
                                    //提取所在城市ShopCity
                                    ShopCity = resultString[0].ToString();
                                    //商店名ShopName
                                    ShopName = resultString[5].ToString().Replace("'", ".");
                                    //商店所在区
                                    ShopArea = resultString[2].ToString();
                                    //商店属于购物类别
                                    ShopSort = resultString[4].ToString();

                                    try
                                    {   //商店联系电话
                                        int PhoneStart = strResult.IndexOf("电话:");
                                        //有电话走这边,没电话不添加
                                        if (PhoneStart.ToString() != "-1")
                                        {
                                            int PhoneEnd = strResult.IndexOf(@"<div Update"">");
                                            ShopPhone = DropHTMLTag(strResult.Substring(PhoneStart, PhoneEnd - PhoneStart)).Replace("电话:", "").Replace("&nbsp;", "");

                                        }
                                        else
                                        {
                                            ShopPhone = "无联系电话";
                                        }

                                    }
                                    catch (Exception ex)
                                    {

                                        TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                        tw.Flush();
                                        tw.WriteLine("错误原因:" + ex.Message + Url);
                                        tw.WriteLine("-------------------------------------------");
                                        tw.Flush();
                                        tw.Close();
                                        tw = null;

                                    }


                                    try
                                    {
                                        //分类标签ShopTag
                                        int TagStart = strResult.IndexOf(@"<div ShopTag"">");
                                        int TagEnd = strResult.IndexOf(@"<div Reviews"">");

                                        ShopTag = DropHTMLTag(strResult.Substring(TagStart, TagEnd - TagStart)).Replace("&nbsp;", "");
                                    }

                                    catch (Exception ex)
                                    {

                                        TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                        tw.Flush();
                                        tw.WriteLine("错误原因:" + ex.Message + Url);
                                        tw.WriteLine("-------------------------------------------");
                                        tw.Flush();
                                        tw.Close();
                                        tw = null;

                                    }

                                }
                                else if (str1 == str4)
                                {

                                    try
                                    {
                                        //提取地址ShopAddress
                                        int AddressEnd = DianpingInfo.IndexOf("地图");
                                        int AddressStart = DianpingInfo.IndexOf("<b>地址:");
                                        if (AddressEnd > 0)
                                        {
                                            ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                        }
                                        else if ((AddressEnd = DianpingInfo.IndexOf("电话:")) > 0)
                                        {
                                            ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                        }
                                        else
                                        {
                                            AddressEnd = DianpingInfo.IndexOf(@"<div Update"">");
                                            ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                        }
                                    }
                                    catch (Exception ex)
                                    {

                                        TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                        tw.Flush();
                                        tw.WriteLine("错误原因:" + ex.Message + Url);
                                        tw.WriteLine("-------------------------------------------");
                                        tw.Flush();
                                        tw.Close();
                                        tw = null;

                                    }

                                    //商店ID名ShopID
                                    ShopID = shopid.ToString();
                                    //提取所在城市ShopCity
                                    ShopCity = resultString[0].ToString();
                                    //商店名ShopName
                                    ShopName = resultString[5].ToString().Replace("'", ".");
                                    //商店所在区
                                    ShopArea = resultString[3].ToString();
                                    //商店属于购物类别
                                    ShopSort = resultString[4].ToString();

                                    try
                                    {   //商店联系电话
                                        int PhoneStart = strResult.IndexOf("电话:");
                                        //有电话走这边,没电话不添加
                                        if (PhoneStart.ToString() != "-1")
                                        {
                                            int PhoneEnd = strResult.IndexOf(@"<div Update"">");
                                            ShopPhone = DropHTMLTag(strResult.Substring(PhoneStart, PhoneEnd - PhoneStart)).Replace("电话:", "").Replace("&nbsp;", "");

                                        }
                                        else
                                        {
                                            ShopPhone = "无联系电话";
                                        }

                                    }
                                    catch (Exception ex)
                                    {

                                        TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                        tw.Flush();
                                        tw.WriteLine("错误原因:" + ex.Message + Url);
                                        tw.WriteLine("-------------------------------------------");
                                        tw.Flush();
                                        tw.Close();
                                        tw = null;

                                    }


                                    try
                                    {
                                        //分类标签ShopTag
                                        int TagStart = strResult.IndexOf(@"<div ShopTag"">");
                                        int TagEnd = strResult.IndexOf(@"<div Reviews"">");

                                        ShopTag = DropHTMLTag(strResult.Substring(TagStart, TagEnd - TagStart)).Replace("&nbsp;", "");
                                    }

                                    catch (Exception ex)
                                    {

                                        TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                        tw.Flush();
                                        tw.WriteLine("错误原因:" + ex.Message + Url);
                                        tw.WriteLine("-------------------------------------------");
                                        tw.Flush();
                                        tw.Close();
                                        tw = null;

                                    }

                                }
                                else
                                {
                                    try
                                    {
                                        //提取地址ShopAddress
                                        int AddressEnd = DianpingInfo.IndexOf("地图");
                                        int AddressStart = DianpingInfo.IndexOf("<b>地址:");
                                        if (AddressEnd > 0)
                                        {
                                            ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                        }
                                        else if ((AddressEnd = DianpingInfo.IndexOf("电话:")) > 0)
                                        {
                                            ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                        }
                                        else
                                        {
                                            AddressEnd = DianpingInfo.IndexOf(@"<div Update"">");
                                            ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                        }
                                    }
                                    catch (Exception ex)
                                    {

                                        TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                        tw.Flush();
                                        tw.WriteLine("错误原因:" + ex.Message + Url);
                                        tw.WriteLine("-------------------------------------------");
                                        tw.Flush();
                                        tw.Close();
                                        tw = null;

                                    }

                                    //商店ID名ShopID
                                    ShopID = shopid.ToString();
                                    //提取所在城市ShopCity
                                    ShopCity = resultString[0].ToString();
                                    //商店名ShopName
                                    ShopName = resultString[3].ToString().Replace("'", ".");
                                    //商店没有所在区
                                    ShopArea = "无";
                                    //商店属于购物类别
                                    ShopSort = resultString[2].ToString();

                                    try
                                    {   //商店联系电话
                                        int PhoneStart = strResult.IndexOf("电话:");
                                        //有电话走这边,没电话不添加
                                        if (PhoneStart.ToString() != "-1")
                                        {
                                            int PhoneEnd = strResult.IndexOf(@"<div Update"">");
                                            ShopPhone = DropHTMLTag(strResult.Substring(PhoneStart, PhoneEnd - PhoneStart)).Replace("电话:", "").Replace("&nbsp;", "");

                                        }
                                        else
                                        {
                                            ShopPhone = "无联系电话";
                                        }
                                    }
                                    catch (Exception ex)
                                    {

                                        TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                        tw.Flush();
                                        tw.WriteLine("错误原因:" + ex.Message + Url);
                                        tw.WriteLine("-------------------------------------------");
                                        tw.Flush();
                                        tw.Close();
                                        tw = null;

                                    }


                                    try
                                    {
                                        //分类标签ShopTag
                                        int TagStart = strResult.IndexOf(@"<div ShopTag"">");
                                        int TagEnd = strResult.IndexOf(@"<div Reviews"">");

                                        ShopTag = DropHTMLTag(strResult.Substring(TagStart, TagEnd - TagStart)).Replace("&nbsp;", "");
                                    }

                                    catch (Exception ex)
                                    {

                                        TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                        tw.Flush();
                                        tw.WriteLine("错误原因:" + ex.Message + Url);
                                        tw.WriteLine("-------------------------------------------");
                                        tw.Flush();
                                        tw.Close();
                                        tw = null;

                                    }


                                }


                            }
                            catch (Exception ex)
                            {

                                TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                tw.Flush();
                                tw.WriteLine("错误原因:" + ex.Message + Url);
                                tw.WriteLine("-------------------------------------------");
                                tw.Flush();
                                tw.Close();
                                tw = null;

                            }

                            //插入数据库
                            StrSql = "insert into ShopInfo values(" + ShopID + ",'" + ShopName + "','" + ShopAddress + "','" + ShopPhone + "','" + ShopCity + "','" + ShopArea + "','" + ShopSort + "','" + ShopTag + "')";
                            log = DBHelper.ExecuteSql(StrSql) + log;
                           

                        }
                        else
                        {

                        }
                        continue;
                    }
                }
                catch (Exception ex)
                {

                    TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                    tw.Flush();
                    tw.WriteLine("错误原因:" + ex.Message);
                    tw.WriteLine("-------------------------------------------");
                    tw.Flush();
                    tw.Close();
                    tw = null;
                   
                }       
                finally
                {

                    MessageBox.Show("完成" + System.DateTime.Now.ToString("hh时mm分ss秒"));
                }
               
              
               
               
            }
            //获取网源码方法
            public string GetHtmlCode(string Url)
            {
                Encoding encoding = Encoding.GetEncoding("utf-8");
                string strResult = "";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    request.Timeout = 30000;
                    request.Headers.Set("Pragma", "no-cache");
                    request.CookieContainer = new CookieContainer();
                    request.Credentials = CredentialCache.DefaultCredentials;
                    request.Referer = Url;
                    request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();
                    StreamReader streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                    streamReceive.Close();
                    streamReader.Close();
                    streamReceive = null;
                    streamReader = null;
                }
                catch (Exception ex)
                {
                   
                }
                return strResult;
            }
            //提供BooL判断是否继续
            public bool ReturnMsg(string strResult)
            {   //获取ID
                int iTitleStart = strResult.IndexOf("<title>");
                int iTitleEnd = strResult.IndexOf("</title>");
                string StrWeb = strResult.Substring(iTitleStart, iTitleEnd - iTitleStart);
                string StrTitle = DropHTMLTag(StrWeb);
                //获取时候是购物类型网StrSort
                string StrSort="";
               
                try
                {
                    string divStart = @"<div ShopGuide"">";
                    string divEnd = @"<div Reviews"">";
                    int iDivStart = strResult.IndexOf(divStart);
                    int iDivEnd = strResult.IndexOf(divEnd);
                    string NavigationInfo = @"<div Shop"">";
                    int NavigationEnd = strResult.IndexOf(NavigationInfo);
                    string Navigation = strResult.Substring(iDivStart, NavigationEnd - iDivStart);
                    string str = DropHTMLTag(Navigation).Replace("&nbsp;", "ml").Replace(">", "ml");
                    string ml = "ml";
                    string[] resultString= Regex.Split(str, ml, RegexOptions.IgnoreCase);
                    StrSort = resultString[1].ToString();
                }
                catch
                {
                    StrSort = "出错";
     
                }
                finally
                {
                   
                }
                if (StrSort!="购物"||StrTitle == "该商户不存在或已被删除" )
                {
                    return false;
                }
                else
                {
                    return true;
                }

            }
         
        }     

    }

  • 相关阅读:
    标准输入/输出通道
    不要在纠结负数的表示了
    Coursera公开课-Machine_learing:编程作业7
    Heap堆
    广义表的实现
    二叉树的实现
    模拟实现strstr和strrstr
    栈和队列常考面试题(二)
    栈和队列常考面试题(一)
    vector迭代器失效的几种情况
  • 原文地址:https://www.cnblogs.com/hfzsjz/p/1656722.html
Copyright © 2011-2022 走看看