zoukankan      html  css  js  c++  java
  • 用Regex去除HTML标记

    /// <summary>

    /// 去除HTML标记

    /// </summary>

    /// <param>包括HTML的源码 </param>

    /// <returns>已经去除后的文字</returns>

     public static string StripHTML(string strHtml)

     {

                string[] aryReg ={

              @"<script[^>]*?>.*?</script>",

    @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",

              @"([\r\n])[\s]+",

              @"&(quot|#34);",

              @"&(amp|#38);",

              @"&(lt|#60);",

              @"&(gt|#62);",

              @"&(nbsp|#160);",

              @"&(iexcl|#161);",

              @"&(cent|#162);",

              @"&(pound|#163);",

              @"&(copy|#169);",

              @"&#(\d+);",

              @"-->",

              @"<!--.*\n"

            

             };

     

                string[] aryRep = {

               "",

               "",

               "",

               "\"",

               "&",

               "<",

               ">",

               " ",

               "\xa1",//chr(161),

               "\xa2",//chr(162),

               "\xa3",//chr(163),

               "\xa9",//chr(169),

               "",

               "\r\n",

               ""

              };

     

                string newReg = aryReg[0];

                string strOutput = strHtml;

                for (int i = 0; i < aryReg.Length; i++)

                {

                    Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);

                    strOutput = regex.Replace(strOutput, aryRep[i]);

                }

                strOutput.Replace("<", "");

                strOutput.Replace(">", "");

                strOutput.Replace("\r\n", "");

                strOutput.Replace("&nbsp;","");

                return strOutput;

            }

  • 相关阅读:
    python 字典
    python 列表
    被闭包啪啪啪的打脸之 闭包的错误使用
    TCP的三次握手和四次挥手
    传输层的TCP和UDP协议
    个人小程序应用开发指南
    ES2019 / ES10有什么新功能?
    CSS开启硬件加速来提高网站性能
    js中this的指向问题
    Js面向对象构造函数继承
  • 原文地址:https://www.cnblogs.com/cuiwenke/p/1688407.html
Copyright © 2011-2022 走看看