zoukankan      html  css  js  c++  java
  • C# 获取网页信息

    • 获取网页源码
     ///通过HttpWebResponse 
    public  string GetUrlHtml(string url)
            {
    
                string strHtml = string.Empty;
    
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
    
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();//从Internet资源返回数据流
    
                if (response.CharacterSet.ToLower() == "gbk")
                {
                    using (Stream respStream = response.GetResponseStream())//读取数据流
                    {
                        using (StreamReader str = new StreamReader(respStream, Encoding.GetEncoding("gb2312")))//读取数据
                        {
                            strHtml = str.ReadToEnd();
                        }
                    }
                }
                else
                {
                    using (Stream respStream = response.GetResponseStream())//读取数据流
                    {
                        using (StreamReader str = new StreamReader(respStream, Encoding.UTF8))//读取数据
                        {
                            strHtml = str.ReadToEnd();
                        }
                    }
                }
                return strHtml;
            }
    ///通过WebClient 
      private static string htmlcontent(string url)
            {                                           
                System.Net.WebClient wc = new System.Net.WebClient();
                Byte[] pageData = wc.DownloadData(url);
                string s = System.Text.Encoding.Default.GetString(pageData);
                ////s = System.Text.Encoding.UTF8.GetString(pageData);去除中文乱码
                return s;
            }
    • 获取网页元素(HtmlAgilityPack)
    using HtmlAgilityPack;
    //HtmlNode node = doc.DocumentNode.SelectSingleNode("Xpath");  元素
    //HtmlNodeCollection nodes = doc1.DocumentNode.SelectNodes(Xpath); //元素集合                
    //
    
    private static Menu Level_chidren_Menu(Menu menu1,string Xpath)
            {                               
                foreach (var item in menu1.ChildMenus)
                {
                    HtmlDocument doc1 = new HtmlDocument();//1步:声明
                    doc1.LoadHtml(HttpHelper.GetUrlHtml(item.MenuUrl));   //加载HTML源码                                
                    HtmlNodeCollection nodes = doc1.DocumentNode.SelectNodes(Xpath);//取节点元素
                    foreach (var page in nodes)
                    {
                        item.ChildMenus.Add(new Menu() {            
                            LevelMenu = 3,
                            MenuName = page.InnerText,
                            MenuUrl = HttpHelper.baseUrl + HttpHelper.GetHtmlAttribute(page.InnerHtml, "a").Attributes["href"].Value   
                        });
                    }
                }
                return menu1;
            }
    

      

  • 相关阅读:
    CAN总线学习资料
    VMware虚拟机 硬盘空间不足 磁盘大小调整方案
    郭天祥-S3C2440开发板Linux2.6.31移植教程
    MFC串口编程——使用标准SerialCom类
    Luogu P2602 [ZJOI2010]数字计数 //数位DP
    luogu P1896 [SCOI2005] 互不侵犯 //状压DP
    中北大学ACM 5/12 T6 CSY的幸福
    P2473 || SCOI2008 奖励关 //状压&&期望DP
    请让蝴蝶爬满全身
    【图论】二分图 // 未完成 =、=
  • 原文地址:https://www.cnblogs.com/Zingu/p/14541846.html
Copyright © 2011-2022 走看看