zoukankan      html  css  js  c++  java
  • C#使用xpath简单爬取网站的内容

           public static void Get()
            {
                // string xpathtrI = "//*[@id='classify-list']/dl/dd/a/cite/span/i";
                #region 获取首页的分类
    
                ////主页的html地址
                //string urlHome = "http://www.qidian.com/";
                //HtmlWeb web = new HtmlWeb();
                //HtmlAgilityPack.HtmlDocument htmlDoc = web.Load(urlHome);
                ////创建html的节点
                //HtmlNode rootNode1 = htmlDoc.DocumentNode;
                //string xpathtrA = "//*[@id='classify-list']/dl/dd";
                //HtmlNodeCollection classList = rootNode1.SelectNodes(xpathtrA);
                //List<string> listINode = new List<string>();
                //foreach (HtmlNode item in classList)
                //{
                //    //获取分类
                //    string inode = item.SelectSingleNode("//a/cite/span/i").InnerText;
                //    listINode.Add(inode);
                //}
    
                #endregion
    
                #region 分类及明细
    
                //string urlDetail = "http://xuanhuan.qidian.com/";
                //List<string> urlList = new List<string>();
                //urlList.Add("http://xuanhuan.qidian.com/");
                //urlList.Add("http://qihuan.qidian.com/");
                //urlList.Add("http://wuxia.qidian.com/");
                //urlList.Add("http://xianxia.qidian.com/");
                //urlList.Add("http://dushi.qidian.com/");
                //urlList.Add("http://zhichang.qidian.com/");
                //urlList.Add("http://junshi.qidian.com/");
                //urlList.Add("http://lishi.qidian.com/");
                //urlList.Add("http://youxi.qidian.com/");
                //urlList.Add("http://tiyu.qidian.com/");
                //urlList.Add("http://kehuan.qidian.com/");
                //urlList.Add("http://lingyi.qidian.com/");//foreach (string url in urlList)
                //{
                //    HtmlAgilityPack.HtmlDocument htmlDetail = web.Load(url);
                //    HtmlNode rootNode2 = htmlDetail.DocumentNode;
                //    string a = "//*[@class='book-info']";
                //    HtmlNodeCollection classList2 = rootNode2.SelectNodes(a);
                //    List<string> listINode2 = new List<string>();
                //    foreach (HtmlNode item in classList2)
                //    {
                //        //获取分类
                //        string inode = item.InnerHtml;
                //        listINode2.Add(inode);
                //    }
                //}
    
                #endregion
    
                #region 文章内容
    
                HtmlWeb web = new HtmlWeb();
                string u = "http://read.qidian.com/chapter/zOGI9RYmNdFhO--gcH8iFg2/h3iHSEH1cSpMs5iq0oQwLQ2";
                HtmlAgilityPack.HtmlDocument htmlDocment = web.Load(u);
                //创建html的节点
                HtmlNode htmlNode = htmlDocment.DocumentNode;
                string x = "//*[@class='read-content j_readContent']";
                HtmlNode htmlNodeP = htmlNode.SelectSingleNode(x);
                string htmlD = htmlNodeP.InnerText;
    
                #endregion
    
            }

                     只是拿一个例子而已。

  • 相关阅读:
    VUE @hook浅析(监听子组件的生命周期钩子)
    JS生成uuid的四种方法
    [ES7] Private, Static class Members
    [Javascript] Constructor Functions and prototype
    [React Performance] Use CSS Variables instead of React Context?
    [Kotlin Spring boot] A simple demo app
    [Kotlin Spring boot] Dependency injection
    [Javascript] Broadcaster + Operator + Listener pattern -- 15. Create a Sequence of Broadcasters
    [Kotlin Spring boot] Connect template with a backing bean
    [Kotlin Spring boot] Enable automatically recompile for Spring boot project with devtool
  • 原文地址:https://www.cnblogs.com/dyxd/p/6902924.html
Copyright © 2011-2022 走看看