zoukankan      html  css  js  c++  java
  • 读取chinanews新闻列表及内容

    string[] urilist ={ "http://www.chinanews.com/rss/scroll-news.xml%22,//热点
                                "http://www.chinanews.com/rss/finance.xml%22,//财经
                                "http://www.chinanews.com/rss/sports.xml%22,//体育
                                "http://www.chinanews.com/rss/ent.xml%22,//娱乐
                                "http://www.chinanews.com/rss/health.xml%22,//健康
                                "http://www.chinanews.com/rss/auto.xml%22,//汽车
                                "http://www.chinanews.com/rss/society.xml%22,//社会
                         };
     private void ReadNews(string uri, ushort type)
            {
                try
                {
                    string xml = NetHelper.ReadHtml(uri, Encoding.GetEncoding("gb2312"));
                    XmlDocument doc = new XmlDocument();
                    doc.LoadXml(xml);
                    Dictionary<ushort, NewsEntity> newslist = new Dictionary<ushort, NewsEntity>();
                    XmlNodeList list = doc.SelectNodes("rss/channel/item");
                    for (int i = 0; i < list.Count; i++)
                    {
                        string title = System.Helpers.XmlHelper.GetChileNode(list[i], "title").InnerText.Replace("(图)""").Replace("(组图)""").Replace("(图)""");
                        string link = System.Helpers.XmlHelper.GetChileNode(list[i], "link").InnerText;
                        string result = "";
                        int end = 0;
                        string html = NetHelper.ReadHtml(link, Encoding.Default);
                        int start = html.IndexOf("<div class=left_zw>");
                        if (start > 0)
                            end = html.IndexOf("<!--正文-->", start);
                        result = html.Substring(start, end - start);

                        int _end = 0;
                        int _start = result.IndexOf(@"<div id=""function_code_page"">");
                        if (_start > 0)
                            _end = result.IndexOf("</div>", _start);
                        string pageStr = result.Substring(_start, _end - _start);

                        result = result.Replace(pageStr, "");
                        result = Regex.Replace(result, "\r""", RegexOptions.IgnoreCase);
                        result = Regex.Replace(result, "\n""", RegexOptions.IgnoreCase);
                        result = Regex.Replace(result, "<.*?>""", RegexOptions.IgnoreCase);                    
                        result = Regex.Replace(result, @"&(.{2,6});""", RegexOptions.IgnoreCase);  
                        result = Regex.Replace(result, "\r{2,}""\r", RegexOptions.IgnoreCase);
                        result = Regex.Replace(result, "\t{2,}""\t", RegexOptions.IgnoreCase);
                        result = Regex.Replace(result, @"\s{2,}""", RegexOptions.IgnoreCase);
                        Console.WriteLine(result);
                        result = result.Trim('\r''\n').TrimEnd();
                        if (!string.IsNullOrEmpty(title) && !string.IsNullOrEmpty(result))
                        {
                            
                        }
                    }
                             
            }
                catch
                {
                    
                }

            }
  • 相关阅读:
    AVWS安装
    Windows 组策略运用
    Windows系统盘清理
    windows计划任务遇到的坑
    mstsc 复制粘贴,遇到的坑~以及输入法无法切换问题
    pyinstaller打包后运行报错-No module named 'pymssql._mssql'
    Word英文如何优雅对齐显示
    键盘除了fn键都失效了,键盘失灵怎么办?
    Ignatius's puzzle
    Train Problem II
  • 原文地址:https://www.cnblogs.com/94cool/p/2218576.html
Copyright © 2011-2022 走看看