C#HtmlAgilityPack.HtmlDocument和HtmlAgilityPack.HtmlNode的使用
HtmlAgilityPack.HtmlDocument response = null;
//HtmlAgilityPack.HtmlNode类和HtmlAgilityPack.HtmlDocument类的使用!
HtmlAgilityPack.HtmlNode responseNew = null;
HtmlDocument doc = new HtmlDocument();
wc.Encoding = Encoding.UTF8;
string html= wc.DownloadString(url);
doc.LoadHtml(html);
responseNew = doc.DocumentNode.SelectSingleNode("/html/body"); //根据XPath查找节点,跟XmlNode差不多
HtmlNodeCollection categoryNodeList = responseNew.SelectNodes("div[3]/div[1]/div[1]/div[1]/ul[1]/li");
foreach (HtmlNode item in categoryNodeList)
//foreach (HtmlNode item in ulS2.ChildNodes)
{
var xpath = item.XPath;
if (k % 2 != 0)
{
number = number + 1;
xpath = xpath.Replace("/#text[" + number + "]", "/li");//替换字符串
}
k=k+1;
string titleName, infourl, company, city, date, salary, salary_em, source;
titleName = item.SelectSingleNode(xpath + "/div/div/span/a").InnerText;//
infourl = item.SelectSingleNode(xpath + "/div/div/span/a").Attributes["href"].Value; //url
}
XML节点的模糊查询:contains(@属性,'模糊查询的值')
ulS = response.DocumentNode.SelectNodes("//*[@id='resultList']/div[contains(@class,'el')]");
XML节点的等值查询
ulS = response.DocumentNode.SelectNodes("//*[@id='resultList']/div[@class='el']");
for (int i = 2; i < ulS.Count; i++)
{
var item = ulS[i];
var xpath = item.XPath;
string titleName;
titleName = item.SelectSingleNode(xpath + "/p/span/a").InnerText;
}