zoukankan      html  css  js  c++  java
  • .NET 去除一段文本中的HTML标记

     1  /// <summary>
     2     /// 去除一段文字中的HTML标记
     3     /// </summary>
     4     /// <param name="Htmlstring"></param>
     5     /// <returns></returns>
     6     public static string NoHTML(string Htmlstring)
     7     {
     8         //删除脚本 
     9         Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
    10         //删除HTML 
    11         Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
    12         Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
    13         Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
    14         Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
    15 
    16         Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
    17         Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
    18         Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
    19         Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
    20         Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", "   ", RegexOptions.IgnoreCase);
    21         Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
    22         Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
    23         Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
    24         Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
    25         Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
    26 
    27         Htmlstring.Replace("<", "");
    28         Htmlstring.Replace(">", "");
    29         Htmlstring.Replace("\r\n", "");
    30         Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
    31 
    32         return Htmlstring;
    33     }

     方法2:

     #region Function NoHTML
            public static string NoHTML(string html)
            {
                System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" no[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                html = regex1.Replace(html, ""); //过滤<script></script>标记 
                html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性 
                html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件 
                html = regex4.Replace(html, ""); //过滤iframe 
                html = regex5.Replace(html, ""); //过滤frameset 
                html = regex6.Replace(html, ""); //过滤frameset 
                html = regex7.Replace(html, ""); //过滤frameset 
                html = regex8.Replace(html, ""); //过滤frameset 
                html = regex9.Replace(html, "");
                html = html.Replace(" ", "");
                html = html.Replace("</strong>", "");
                html = html.Replace("<strong>", "");
                return html;
            }
            #endregion
  • 相关阅读:
    C# 控制反转(IOC: Inverse Of Control) & 依赖注入(DI: Independence Inject)
    英语常见短语汇总001
    ASP.Net Web.config 中引用外部config文件
    CSS样式汇总
    RSA非对称加密算法
    排序算法【2】——快速排序
    cmake引入boost
    boost之algorithm
    tar命令
    欧拉定理
  • 原文地址:https://www.cnblogs.com/xdoudou/p/3029996.html
Copyright © 2011-2022 走看看