zoukankan      html  css  js  c++  java
  • 正则过滤html标记

        class Class1
        {
            /// <summary>
            /// 去除HTML标记
            /// </summary>
            /// <param name="strHtml">包括HTML的源码 </param>
            /// <returns>已经去除后的文字</returns>
           public static string StripHTML(string strHtml)
            {
                string[] aryReg ={
             @"<script[^>]*?>.*?</script>",
    
              @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(file://[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>",
              @"([\r\n])[\s]+",
              @"&(quot|#34);",
              @"&(amp|#38);",
    
              @"&(lt|#60);",
              @"&(gt|#62);", 
              @"&(nbsp|#160);", 
              @"&(iexcl|#161);",
              @"&(cent|#162);",
              @"&(pound|#163);",
              @"&(copy|#169);",
              @"&#(\d+);",
              @"-->",
              @"<!--.*\n"
            
             };
    
               string[] aryRep = {
               "",
               "",
               "",
               "\"",
               "&",
               "<",
               ">",
               " ",
               "\xa1",//chr(161),
               "\xa2",//chr(162),
              "\xa3",//chr(163),
               "\xa9",//chr(169),
               "",
               "\r\n",
               ""
              };
    
                string newReg = aryReg[0];
                string strOutput = strHtml;
                for (int i = 0; i < aryReg.Length; i++)
                {
                    Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);
                    strOutput = regex.Replace(strOutput, aryRep[i]);
                }
    
                strOutput.Replace("<", "");
                strOutput.Replace(">", "");
                strOutput.Replace("\r\n", "");
    
    
                return strOutput;
            }
        }
    

    源:http://www.cnblogs.com/mishy/archive/2008/01/03/1024953.html  



    返回导读目录,阅读更多随笔



    分割线,以下为博客签名:

    软件臭虫情未了
    • 编码一分钟
    • 测试十年功


    随笔如有错误或不恰当之处、为希望不误导他人,望大侠们给予批评指正。

  • 相关阅读:
    CodeForces 514B
    CodeForces 514A
    UVa 818
    HDU 1003
    UVa百题总结
    UVa 11526
    UVa 12412
    UVa 211
    UVa 1587
    UVa 225 – Golygons [DFS+剪枝]
  • 原文地址:https://www.cnblogs.com/08shiyan/p/2155880.html
Copyright © 2011-2022 走看看