zoukankan      html  css  js  c++  java
  • 收藏的2个正则html标签剔除方法

    方法1

    public string checkStr(string html)
          {
              System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
              System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
              System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" no[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
              System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
              System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
              System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
              System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
              System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
              System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
              html = regex1.Replace(html, ""); //过滤<script></script>标记
              html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性
              html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件
              html = regex4.Replace(html, ""); //过滤iframe
              html = regex5.Replace(html, ""); //过滤frameset
              html = regex6.Replace(html, ""); //过滤frameset
              html = regex7.Replace(html, ""); //过滤frameset
              html = regex8.Replace(html, ""); //过滤frameset
              html = regex9.Replace(html, "");
              html = html.Replace(" ", "");
              html = html.Replace("</strong>", "");
              html = html.Replace("<strong>", "");
              return html;
    }




    方法2

    #region 过滤掉 html代码
    public static string StripHTML(string strHtml)
    {
    string [] aryReg ={
    @"<script[^>]*?>.*?</script>",

    @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
    @"([\r\n])[\s]+",
    @"&(quot|#34);",
    @"&(amp|#38);",
    @"&(lt|#60);",
    @"&(gt|#62);",
    @"&(nbsp|#160);",
    @"&(iexcl|#161);",
    @"&(cent|#162);",
    @"&(pound|#163);",
    @"&(copy|#169);",
    @"&#(\d+);",
    @"-->",
    @"<!--.*\n"
    };

    string [] aryRep = {
    "",
    "",
    "",
    "\"",
    "&",
    "<",
    ">",
    " ",
    "\xa1",//chr(161),
    "\xa2",//chr(162),
    "\xa3",//chr(163),
    "\xa9",//chr(169),
    "",
    "\r\n",
    ""
    };

    string newReg =aryReg[0];
    string strOutput=strHtml;
    for(int i = 0;i<aryReg.Length;i++)
    {
    System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(aryReg[i],System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    strOutput = regex.Replace(strOutput,aryRep[i]);
    }
    strOutput.Replace("<","");
    strOutput.Replace(">","");
    strOutput.Replace("\r\n","");
    return strOutput;
    }
    #endregion

    噢耶游戏是中国最大的轻社交游戏开发商,致力于手机页游的研发及推广业务。我们首创性地提出了HTML5游戏中心思路,在第三方App 中嵌入式休闲游戏,为开发者提供了全新的应用内游戏解决方案。
  • 相关阅读:
    今天看了几个小时的微信小程序说说心得体会
    关于wordpress中的contact form7和WP Mail SMTP的一些设置
    关于163发邮件报错535 Error:authentication failed解决方法
    Numpy 基本除法运算和模运算
    基本的图像操作和处理
    Python中flatten用法
    media
    TensorFlow模型保存和提取方法
    docker 默认用户和密码
    Windows安装TensorFlow
  • 原文地址:https://www.cnblogs.com/yintian2/p/968127.html
Copyright © 2011-2022 走看看