zoukankan      html  css  js  c++  java
  • C#使用xpath简单爬取网站的内容

           public static void Get()
            {
                // string xpathtrI = "//*[@id='classify-list']/dl/dd/a/cite/span/i";
                #region 获取首页的分类
    
                ////主页的html地址
                //string urlHome = "http://www.qidian.com/";
                //HtmlWeb web = new HtmlWeb();
                //HtmlAgilityPack.HtmlDocument htmlDoc = web.Load(urlHome);
                ////创建html的节点
                //HtmlNode rootNode1 = htmlDoc.DocumentNode;
                //string xpathtrA = "//*[@id='classify-list']/dl/dd";
                //HtmlNodeCollection classList = rootNode1.SelectNodes(xpathtrA);
                //List<string> listINode = new List<string>();
                //foreach (HtmlNode item in classList)
                //{
                //    //获取分类
                //    string inode = item.SelectSingleNode("//a/cite/span/i").InnerText;
                //    listINode.Add(inode);
                //}
    
                #endregion
    
                #region 分类及明细
    
                //string urlDetail = "http://xuanhuan.qidian.com/";
                //List<string> urlList = new List<string>();
                //urlList.Add("http://xuanhuan.qidian.com/");
                //urlList.Add("http://qihuan.qidian.com/");
                //urlList.Add("http://wuxia.qidian.com/");
                //urlList.Add("http://xianxia.qidian.com/");
                //urlList.Add("http://dushi.qidian.com/");
                //urlList.Add("http://zhichang.qidian.com/");
                //urlList.Add("http://junshi.qidian.com/");
                //urlList.Add("http://lishi.qidian.com/");
                //urlList.Add("http://youxi.qidian.com/");
                //urlList.Add("http://tiyu.qidian.com/");
                //urlList.Add("http://kehuan.qidian.com/");
                //urlList.Add("http://lingyi.qidian.com/");//foreach (string url in urlList)
                //{
                //    HtmlAgilityPack.HtmlDocument htmlDetail = web.Load(url);
                //    HtmlNode rootNode2 = htmlDetail.DocumentNode;
                //    string a = "//*[@class='book-info']";
                //    HtmlNodeCollection classList2 = rootNode2.SelectNodes(a);
                //    List<string> listINode2 = new List<string>();
                //    foreach (HtmlNode item in classList2)
                //    {
                //        //获取分类
                //        string inode = item.InnerHtml;
                //        listINode2.Add(inode);
                //    }
                //}
    
                #endregion
    
                #region 文章内容
    
                HtmlWeb web = new HtmlWeb();
                string u = "http://read.qidian.com/chapter/zOGI9RYmNdFhO--gcH8iFg2/h3iHSEH1cSpMs5iq0oQwLQ2";
                HtmlAgilityPack.HtmlDocument htmlDocment = web.Load(u);
                //创建html的节点
                HtmlNode htmlNode = htmlDocment.DocumentNode;
                string x = "//*[@class='read-content j_readContent']";
                HtmlNode htmlNodeP = htmlNode.SelectSingleNode(x);
                string htmlD = htmlNodeP.InnerText;
    
                #endregion
    
            }

                     只是拿一个例子而已。

  • 相关阅读:
    Git笔记
    排序学习LTR(1):排序算法的评价指标
    C++指针
    C++基础知识笔记
    Shell脚本--菜鸟教程笔记
    torch学习01-入门文档学习
    torch学习02-tensor学习
    torch学习0: 学习概览
    linux基础-用户创建及管理相关
    python-getattr() 函数 dir() 函数
  • 原文地址:https://www.cnblogs.com/dyxd/p/6902924.html
Copyright © 2011-2022 走看看