zoukankan      html  css  js  c++  java
  • 实现百度搜索页面网页遍历

    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.Text;
    using System.Windows.Forms;
    using mshtml;
    using System.Threading;
    using mshtml;
    
    namespace WindowsFormsApplication3
    {
        public partial class Form1 : Form
        {
            public Form1()
            {
                InitializeComponent();
            }
            /// <summary>
            /// 在控件中打开网页
            /// </summary>
            public void baidu()
            {
                string url = "http://www.baidu.com/s?wd="+textBox1.Text;
                webBrowser1.Navigate(url.Trim());
            }
            private void button1_Click(object sender, EventArgs e)
            {
                baidu();
                
            }
            /// <summary>
            /// 打开新网页不会跳转到其他浏览器
            /// </summary>
            /// <param name="sender"></param>
            /// <param name="e"></param>
            private void webBrowser1_NewWindow(object sender, CancelEventArgs e)
            {
                e.Cancel = true;
                webBrowser1.Navigate(webBrowser1.StatusText);
            }
    
            /// <summary>
            /// 获取网页所有节点,遍历所有节点,如有标签的文本值是"下一页",模拟点击,进入下一页
            /// </summary>
            public void bianli()
            {
                IHTMLDocument2 doc = webBrowser1.Document.DomDocument as IHTMLDocument2;
                foreach (IHTMLElement ele in doc.all)
                {
                    if (ele.innerText == "下一页>")
                    {                   
                        ele.click();
                        break;
                    }
                }
            }
    
            /// <summary>
            /// 判读网页是否加载完成
            /// </summary>
            /// <param name="sender"></param>
            /// <param name="e"></param>
            private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
            {
                if (e.Url==webBrowser1.Document.Url)
                {
                    bianliwangye();
                }
            }
    
            private void button2_Click(object sender, EventArgs e)
            {
                //bianli();
                bianliwangye();
            }
    
            /// <summary>
            /// 枚举获取百度搜索页面的所有网址
            /// </summary>
            public void bianliwangye()
            {            
                IHTMLDocument2 document = (IHTMLDocument2)webBrowser1.Document.DomDocument;//获取源代码
                IHTMLElementCollection hc = (IHTMLElementCollection)document.all;//获取所有标签
                //MessageBox.Show(hc.ToString());
                foreach (IHTMLElement h in hc)//遍历标签
                {
                    if (h.className=="g"||h.className=="c-showurl")//以标签classname判读
                    {
                        string a= h.innerHTML;//获取标签文本内容
                        if (a.Contains("&nbsp"))
                        {
                            int b = a.IndexOf("&nbsp");
                            string a1 = a.Substring(0, b);//截取网址
                            MessageBox.Show(a1);
                        }                                      
                    }
                }
                bianli();//当枚举到当前页面最后一个网址,模拟点击进入下一页           
            }
        }
    }
  • 相关阅读:
    运算符
    数据运算
    login_code
    headless&unittest
    dict-test
    list_test
    string_test
    python1113
    堆排序
    java线程同步问题
  • 原文地址:https://www.cnblogs.com/happinesshappy/p/4596297.html
Copyright © 2011-2022 走看看