zoukankan      html  css  js  c++  java
  • c#语言输入关键字,抓取你想要的所有网址

          
          
    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.Linq;
    using System.Text;
    using System.Windows.Forms;
    using mshtml;
    using System.Collections;
    using System.Threading;
    
    namespace 遍历百度网页
    {
        public delegate void baidu111();
        public partial class Form1 : Form
        {
            public Form1()
            {
                InitializeComponent();
            
            }
            private void Form1_Load(object sender, EventArgs e)
            {
                listView1.GridLines = true;
            }
    
            Thread thread;
             void baidu()
            {
                baidu111();
            }
    
             void baidu111()
             {
                 if (webBrowser1.InvokeRequired)
                 {
                     baidu111 dr = new baidu111(baidu111);
                     this.Invoke(dr);
                 }
                 else
                 {
                     string url = "http://www.baidu.com/s?wd=" + textBox1.Text;
                     webBrowser1.Navigate(url.Trim());
                 }
             }
            //点击停止
             private void button2_Click(object sender, EventArgs e)
             {
                 
             }
    
            private void button1_Click(object sender, EventArgs e)
            {
            // System.Diagnostics.Process.Start("http://www.baidu.com");
              //webBrowser1.Navigate("http://www.baidu.com/");
                thread = new Thread(new ThreadStart(baidu));
                thread.IsBackground = true;
                thread.Start();
             
    
                baidu();
            }
            int i = 0;
            public void bianli()
            {
                IHTMLDocument2 doc = webBrowser1.Document.DomDocument as IHTMLDocument2;
                foreach (IHTMLElement ele in doc.all)
                {  
                    if (ele.innerText == "下一页>")
                    {
                        ele.click();
                        i++;
                        if (i==3)
                        {
                            break; 
                        }
                       
                    }
                }
            }
    
            /// <summary>
            /// 判读网页是否加载完成
            /// </summary>
            /// <param name="sender"></param>
            /// <param name="e"></param>
            private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
            {
                if (i==3)
                {
                    return;
                }
                if (e.Url == webBrowser1.Document.Url)
                {
                    bianliwangye();
               
                }
            }
            /// <summary>
            /// 枚举获取百度搜索页面的所有网址
            /// </summary>
            /// 
    
            ArrayList all=new ArrayList();
            string a1;
         
            public void bianliwangye()
            {            
                IHTMLDocument2 document = (IHTMLDocument2)webBrowser1.Document.DomDocument;//获取源代码
                IHTMLElementCollection hc = (IHTMLElementCollection)document.all;//获取所有标签
                //MessageBox.Show(hc.ToString());
              
                foreach (IHTMLElement h in hc)//遍历标签
                {
                    if (h.className == "g" || h.className == "c-showurl")//以标签classname判读
                    {
                        string a = h.innerHTML;//获取标签文本内容
                        if (a.Contains("&nbsp"))
                        {
                            int b = a.IndexOf("&nbsp");
                            a1 = a.Substring(0, b);//截取网址
                            MessageBox.Show(a1);
                            all.Add(a1);
                        }
                    }
                }
                bianli();//当枚举到当前页面最后一个网址,模拟点击进入下一页   
             
            }
    
            private void button3_Click(object sender, EventArgs e)
            {
                listView1.Columns.Add("编号");
                listView1.Columns.Add("获取到的网址",400);
                
                for (int i = 0; i < all.Count; i++)
                {
                    listView1.Items.Add((i+1).ToString());
                    listView1.Items[i].SubItems.Add(all[i].ToString());
                }
    
            }
    
          
          
        }
    }
  • 相关阅读:
    angular学习之通俗易懂篇-----constructor()与ngOnInit()
    vscode----快捷键
    angular学习之通俗易懂篇-----路由
    angular学习之通俗易懂篇-----双向数据绑定MVVM
    angular学习之通俗易懂篇-----数据绑定
    angular学习之通俗易懂篇-----新建组件并调用
    angular学习之-----常用命令行
    windows环境下node更新最新版本----简单粗暴
    首例爬虫禁令:法院裁定立即停止擅自爬取微信公众号相关数据行为取微信公众号相关数据行为
    Pycharm两种快速激活方式(附最新激活码和插件)
  • 原文地址:https://www.cnblogs.com/275147378abc/p/4620581.html
Copyright © 2011-2022 走看看