zoukankan      html  css  js  c++  java
  • c# winform webBrowser爬取数据

    int index=0;
    private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
            {
                if (webBrowser1.ReadyState == WebBrowserReadyState.Complete)//是否完全加载完成
                {
                    textBox2.AppendText("就绪.....
    ");
                    if (index == 0)
                    {
                        if (e.Url.ToString() == webBrowser1.Url.ToString())
                        {
                            this.cobURL.Text = this.webBrowser1.Document.Url.ToString();
                            this.TabText.Text = this.webBrowser1.DocumentTitle;
    
                            HtmlDocument hd = webBrowser1.Document;
                            HtmlElement he = hd.GetElementsByTagName("input").GetElementsByName("keywords")[0];
    
                            he.InnerText = this.textBox6.Text.Trim().ToString();//你要查的内容
                            HtmlElement bt = webBrowser1.Document.All["header-search-button"];
                            bt.InvokeMember("click");//主页面,触发点击事件,根据你要查的内容去进入子页面
                            textBox2.AppendText("正在进入子页面.....
    ");
                            index++;
                        }
                    }
                    else if (index == 1)
                    {
                        textBox2.AppendText("进入"+ e.Url.ToString() + "
    ");
                        if (e.Url.ToString() == webBrowser1.Url.ToString())
                        {
                            this.cobURL.Text = this.webBrowser1.Document.Url.ToString();
                            this.TabText.Text = this.webBrowser1.DocumentTitle;
    
                            HtmlDocument hd = webBrowser1.Document;
                            if (hd.GetElementById("DatasheetsTable1")==null)
                            {
                                textBox2.AppendText("该地址未能找到所需数据.....
    ");
                                textBox2.AppendText("获取结束.....
    ");
                                webBrowser1.DocumentCompleted -= new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(this.webBrowser1_DocumentCompleted);
                                index = 0;
                                return;
                            }
                            string ceshi= hd.GetElementById("DatasheetsTable1").FirstChild.OuterText.ToString();
                            string[] sArray = ceshi.Split(new string[] { "
    "}, StringSplitOptions.RemoveEmptyEntries);
                            string bz= sArray[1].Replace("c数据表头", "");//根据情况处理
                            bz= bz.Trim().ToString();//获取数据c
                            textBox3.Text = bz;
    
                            string ceshi1 = hd.GetElementById("SpecificationTable1").FirstChild.OuterText.ToString();
                            string[] sArray1 = ceshi1.Split(new string[] { "
    " }, StringSplitOptions.RemoveEmptyEntries);
                            string fz = sArray1[7].Replace("a数据表头", ""); 
                            fz = fz.Trim().ToString();//获取数据a
                            textBox4.Text = fz;
                            string qjfz = sArray1[8].Replace("b数据表头", "");
                            qjfz = qjfz.Trim().ToString();//获取数据b
                            textBox5.Text = qjfz;
    
                            index = 0;
                            textBox2.AppendText("获取结束.....
    ");
                            webBrowser1.DocumentCompleted -= new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(this.webBrowser1_DocumentCompleted);
                        }
                    }
                    
                    
                }
            }
    private void button7_Click(object sender, EventArgs e)
            {
                if (textBox6.Text.ToString()!="")
                {
                    textBox2.Clear();
                    textBox3.Text = "";
                    textBox4.Text = "";
                    textBox5.Text = "";
                    textBox2.AppendText("正在获取中.....
    ");
                    this.webBrowser1.Url = new Uri("地址");
                    this.webBrowser1.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(this.webBrowser1_DocumentCompleted);
                }
                
            }

    界面大致,webBrowser隐藏了,把webBrowser的ScriptErrorsSuppressed要设置为True,负责会弹script错误

  • 相关阅读:
    Ubuntu设置文件默认打开方式
    车险与费用计算(仅做参考)
    房贷计算
    PHP敏感词处理
    记一次,接口pending
    layer confirm确认框,多个按钮
    crontab vim 模式
    git指定迁出目录
    mysql树形结构
    Kubeflow实战: 入门介绍与部署实践
  • 原文地址:https://www.cnblogs.com/ykgbk/p/13406001.html
Copyright © 2011-2022 走看看