zoukankan      html  css  js  c++  java
  • 利用HtmlAgilityPack抓取园子里面的数据

       public void GetStr()
            {
                string url = "http://kb.cnblogs.com/list/1001/";
                HtmlWeb web = new HtmlWeb();
                //htmlWeb.DefaultEncoding = System.Text.Encoding.GetEncoding("");

                HtmlAgilityPack.HtmlDocument doc = web.Load(url);
                HtmlNode rootnode = doc.DocumentNode;
                string xpathstring = "//div[@class='list_title']";
                string newpath = "//div[@class='list_title']/a";
                HtmlNodeCollection aa = rootnode.SelectNodes(xpathstring);    //所有找到的节点都是一个集合
                HtmlNodeCollection tt = rootnode.SelectNodes(newpath);
                foreach(HtmlNode ht in tt)
                {
                     string innertext = ht.InnerText;
                    string page = ht.GetAttributeValue("href", "");

                    string newurl = "http://kb.cnblogs.com"+page;
                    string content = GetContent(newurl);
                }
           
            }
            public string GetContent(string url)
            {
                  HtmlWeb web = new HtmlWeb();
                //htmlWeb.DefaultEncoding = System.Text.Encoding.GetEncoding("");
                HtmlAgilityPack.HtmlDocument doc = web.Load(url);
                HtmlNode rootnode = doc.DocumentNode;
                string newpath = "//div[@class='contents_main']";
                HtmlNodeCollection tt = rootnode.SelectNodes(newpath);
                return tt[0].InnerText;
            }

  • 相关阅读:
    STL源代码剖析(二)
    局域网部署docker--从无到有创建自己的docker私有仓库
    Leetcode Add two numbers
    GDIPlus绘制桌面歌词
    Android中apk动态载入技术研究(2)android插件化及实现
    jq 地区(省市县区)联动菜单
    System.Diagnostics.Process.Start的妙用
    aaaa
    RESTful Web 服务:教程
    芒果TV 视频真实的地址获取
  • 原文地址:https://www.cnblogs.com/cxlings/p/3110858.html
Copyright © 2011-2022 走看看