- 获取网页源码
///通过HttpWebResponse public string GetUrlHtml(string url) { string strHtml = string.Empty; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); HttpWebResponse response = (HttpWebResponse)request.GetResponse();//从Internet资源返回数据流 if (response.CharacterSet.ToLower() == "gbk") { using (Stream respStream = response.GetResponseStream())//读取数据流 { using (StreamReader str = new StreamReader(respStream, Encoding.GetEncoding("gb2312")))//读取数据 { strHtml = str.ReadToEnd(); } } } else { using (Stream respStream = response.GetResponseStream())//读取数据流 { using (StreamReader str = new StreamReader(respStream, Encoding.UTF8))//读取数据 { strHtml = str.ReadToEnd(); } } } return strHtml; } ///通过WebClient private static string htmlcontent(string url) { System.Net.WebClient wc = new System.Net.WebClient(); Byte[] pageData = wc.DownloadData(url); string s = System.Text.Encoding.Default.GetString(pageData); ////s = System.Text.Encoding.UTF8.GetString(pageData);去除中文乱码 return s; }
- 获取网页元素(HtmlAgilityPack)
using HtmlAgilityPack; //HtmlNode node = doc.DocumentNode.SelectSingleNode("Xpath"); 元素 //HtmlNodeCollection nodes = doc1.DocumentNode.SelectNodes(Xpath); //元素集合 // private static Menu Level_chidren_Menu(Menu menu1,string Xpath) { foreach (var item in menu1.ChildMenus) { HtmlDocument doc1 = new HtmlDocument();//1步:声明 doc1.LoadHtml(HttpHelper.GetUrlHtml(item.MenuUrl)); //加载HTML源码 HtmlNodeCollection nodes = doc1.DocumentNode.SelectNodes(Xpath);//取节点元素 foreach (var page in nodes) { item.ChildMenus.Add(new Menu() { LevelMenu = 3, MenuName = page.InnerText, MenuUrl = HttpHelper.baseUrl + HttpHelper.GetHtmlAttribute(page.InnerHtml, "a").Attributes["href"].Value }); } } return menu1; }