zoukankan      html  css  js  c++  java
  • C# 获取网页信息

    • 获取网页源码
     ///通过HttpWebResponse 
    public  string GetUrlHtml(string url)
            {
    
                string strHtml = string.Empty;
    
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
    
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();//从Internet资源返回数据流
    
                if (response.CharacterSet.ToLower() == "gbk")
                {
                    using (Stream respStream = response.GetResponseStream())//读取数据流
                    {
                        using (StreamReader str = new StreamReader(respStream, Encoding.GetEncoding("gb2312")))//读取数据
                        {
                            strHtml = str.ReadToEnd();
                        }
                    }
                }
                else
                {
                    using (Stream respStream = response.GetResponseStream())//读取数据流
                    {
                        using (StreamReader str = new StreamReader(respStream, Encoding.UTF8))//读取数据
                        {
                            strHtml = str.ReadToEnd();
                        }
                    }
                }
                return strHtml;
            }
    ///通过WebClient 
      private static string htmlcontent(string url)
            {                                           
                System.Net.WebClient wc = new System.Net.WebClient();
                Byte[] pageData = wc.DownloadData(url);
                string s = System.Text.Encoding.Default.GetString(pageData);
                ////s = System.Text.Encoding.UTF8.GetString(pageData);去除中文乱码
                return s;
            }
    • 获取网页元素(HtmlAgilityPack)
    using HtmlAgilityPack;
    //HtmlNode node = doc.DocumentNode.SelectSingleNode("Xpath");  元素
    //HtmlNodeCollection nodes = doc1.DocumentNode.SelectNodes(Xpath); //元素集合                
    //
    
    private static Menu Level_chidren_Menu(Menu menu1,string Xpath)
            {                               
                foreach (var item in menu1.ChildMenus)
                {
                    HtmlDocument doc1 = new HtmlDocument();//1步:声明
                    doc1.LoadHtml(HttpHelper.GetUrlHtml(item.MenuUrl));   //加载HTML源码                                
                    HtmlNodeCollection nodes = doc1.DocumentNode.SelectNodes(Xpath);//取节点元素
                    foreach (var page in nodes)
                    {
                        item.ChildMenus.Add(new Menu() {            
                            LevelMenu = 3,
                            MenuName = page.InnerText,
                            MenuUrl = HttpHelper.baseUrl + HttpHelper.GetHtmlAttribute(page.InnerHtml, "a").Attributes["href"].Value   
                        });
                    }
                }
                return menu1;
            }
    

      

  • 相关阅读:
    linux IO调度算法
    Programming Languages: Application and Interpretation
    zz 跟风小结一下孕期~
    UML和模式应用学习笔记-1(面向对象分析和设计)
    ASP.NET交互Rest服务接口(Jquery的Get与Post方式)
    Linq To Xml操作XML增删改查
    MSSQL数据库迁移到Oracle(二)
    MSSQL数据库迁移到Oracle
    学习EF之CodeFirst二(数据库对应映射)
    学习EF之CodeFirst一
  • 原文地址:https://www.cnblogs.com/Zingu/p/14541846.html
Copyright © 2011-2022 走看看