zoukankan      html  css  js  c++  java
  • asp.net 新闻采集 简单示例

    在网上看了点资料,自己整理了一下,我感觉要用的话新闻地址,应该用RSS来配这样好用些,

    o(∩_∩)o 哈哈

    private void init2(string url,string begin,string end)
        {
            HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(url);
            HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse();
            Stream stream = webResponse.GetResponseStream();
            System.IO.StreamReader streamReader = new StreamReader(stream, Encoding.GetEncoding("gb2312"));
            string content = streamReader.ReadToEnd();
            streamReader.Close();
            webResponse.Close();
            if (content.IndexOf(begin) > 0)
                content = content.Substring(content.IndexOf(begin));
            if (content.IndexOf(end) > 0)
                content = content.Substring(0, content.IndexOf(end) + end.Length);
            if (content.IndexOf(begin) < 0 || content.IndexOf(end)<0)
            {
                Response.Write("<script>alert('规则定义错误!');</script>");
            }else
            {
                content = DelHTML(content);
                txtContent.Text = content;
            }
        }
    
        public static string DelHTML(string Htmlstring)//将HTML去除
        {
            #region
            //删除脚本
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            //删除HTML
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"-->", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<!--.*", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            //Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<A>.*</A>", "");
            //Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<[a-zA-Z]*=\.[a-zA-Z]*\?[a-zA-Z]+=\d&\w=%[a-zA-Z]*|[A-Z0-9]", "");
            //Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"<P>.*</P>", "");
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, " ", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(amp|#38);", "&", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(lt|#60);", "<", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(gt|#62);", ">", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring, @"&#(\d+);", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            Htmlstring.Replace("<", "");
            Htmlstring.Replace(">", "");
            Htmlstring.Replace("\r\n", "");
            //Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
            #endregion
            return Htmlstring;
        }
    
  • 相关阅读:
    VMware Workstation网卡不启动
    解决IE10以下对象不支持“bind”属性或方法
    二分法查找
    选择排序与冒泡排序
    方法内部开启线程的方法
    重写Collections实现自定义排序
    根据反射生成SQL语句
    vue插件安装备忘
    vue cli4.x 新建项目 过程提醒
    php setcooike()失败的原因之一,希望能帮到你
  • 原文地址:https://www.cnblogs.com/sh_yao/p/1891704.html
Copyright © 2011-2022 走看看