zoukankan      html  css  js  c++  java
  • C# 按地址获取网页数据并解析

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Windows.Forms;
    using System.Net;
    using System.IO;
    
    namespace OneHand
    {
        class googleMap
        {
            //根据Url地址得到网页的html源码 
            public static string GetWebContent(string Url)
            {
                string strResult = "";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    //声明一个HttpWebRequest请求 
                    request.Timeout = 30000;
                    //设置连接超时时间 
                    request.Headers.Set("Pragma", "no-cache");
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();
                    Encoding encoding = Encoding.GetEncoding("GB2312");
                    StreamReader streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                }
                catch
                {
                    MessageBox.Show("出错");
                }
                return strResult;
            } 
        }
    }

    调用解析

      private string Search(string argAddress)
           {
               //要抓取的URL地址 
               StringBuilder sb = new StringBuilder();
               sb.Append("http://ditu.google.cn/maps");
               sb.Append("?");
               sb.Append("hl=zh-CN&newwindow=1&safe=strict");
               sb.Append("&q=");
               sb.Append(argAddress);
               sb.Append("&bav=on.2,or.&bvm=bv.44158598,d.dGI&biw=1440&bih=775&um=1&ie=UTF-8&sa=N&tab=wl");
    
               //得到指定Url的源码 
               string strWebContent = googleMap.GetWebContent(sb.ToString());
    
               //生成HtmlDocument 
               WebBrowser webb = new WebBrowser();
               webb.Navigate("about:blank");
               HtmlDocument htmldoc = webb.Document.OpenNew(true);
               htmldoc.Write(strWebContent);
               HtmlElementCollection htmlTR = htmldoc.GetElementsByTagName("TR");
               foreach (HtmlElement tr in htmlTR)
               {
                   string address = string.Empty;
                   try
                   {
                       string resultspanel = tr.Document.GetElementById("resultspanel").Document.GetElementById("panel_A_2").InnerText;
    
                       string[] ContentLines = resultspanel.Split(new string[] { "\r\n" }, StringSplitOptions.None);//不忽略空行
    
                       address = ContentLines[1];
                   }
                   catch { };
                   //插入DataTable 
                   if (address != string.Empty)
                   {
                       return address;
                   }
                   else
                   {
                       continue;
                   }
               }
    
               return "";
           }
  • 相关阅读:
    五一拆装机学习
    msgbox函数和inputbox函数应该注意的几点
    西游记(3)
    刚刚开通csdn
    c# 快捷键
    JavaBean的属性(Simple,Indexed,Bound,Constrained)【收藏】
    SQL查询语句使用【收藏】
    .NET 对实现IPersistStream接口的对象进行保存和读取
    创建控件数组
    常用数据库JDBC连接写法【收藏】
  • 原文地址:https://www.cnblogs.com/Kakasi/p/2998259.html
Copyright © 2011-2022 走看看