zoukankan      html  css  js  c++  java
  • 利用正则表达式去掉html代码

    using System.Text.RegularExpressions;//需要引用

      
    // 利用正则表达式去掉"<"和">"之间的内容
      private string StripHT(string strHtml)
      
    {
       Regex regex
    =new Regex("<.+?>",RegexOptions.IgnoreCase);
       
    string strOutput=regex.Replace(strHtml,"");
       
    return strOutput;
      }



    //方法二(不知为什么此方法占用CPU100%)

    public static string DropHTML(string strHtml)
      
    {
       
    string [] aryReg ={
              
    @"<script[^>]*?>.*?</script>",
              
    @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""''])(\\[""''tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
              
    @"([\r])[\s]+",
              
    @"&(quot|#34);",
              
    @"&(amp|#38);",
              
    @"&(lt|#60);",
              
    @"&(gt|#62);"
              
    @"&(nbsp|#160);"
              
    @"&(iexcl|#161);",
              
    @"&(cent|#162);",
              
    @"&(pound|#163);",
              
    @"&(copy|#169);",
              
    @"&#(\d+);",
              
    @"-->",
              
    @"<!--.*"         
             }
    ;

       
    string [] aryRep = {
               
    "",
               
    "",
               
    "",
               
    "\"",
               "&",
               
    "<",
               
    ">",
               
    " ",
               
    "\xa1",//chr(161),
               "\xa2",//chr(162),
               "\xa3",//chr(163),
               "\xa9",//chr(169),
               "",
               
    "\r",
               
    ""    
              }
    ;

       
    string newReg =aryReg[0];
       
    string strOutput=strHtml;
       
    for(int i = 0;i<aryReg.Length;i++)
       
    {
        Regex regex 
    = new Regex(aryReg[i],RegexOptions.IgnoreCase );
        strOutput 
    = regex.Replace(strOutput,aryRep[i]);
       }


       strOutput.Replace(
    "<","");
       strOutput.Replace(
    ">","");
       strOutput.Replace(
    "\r","");
       
    return strOutput;
          
      }
     

  • 相关阅读:
    lines-HDU5124(区间处理 +离散化)
    Reorder the Books-HDU5500
    Bad Hair Day-POJ3250(简单的入栈出栈)
    Count the Colors-ZOJ1610(线段树区间求)
    Just a Hook-HDU1698(线段树求区间)
    Mayor's posters-POJ2528(线段树+离散化)
    A Simple Problem with Integers-POJ3468
    Strongly connected-HDU4635
    Caocao's Bridges-HDU4738(Tarjin+求桥)
    Warm up-HUD4612(树的直径+Tarjin缩点)
  • 原文地址:https://www.cnblogs.com/wang123/p/505758.html
Copyright © 2011-2022 走看看