zoukankan      html  css  js  c++  java
  • HTML标签过滤方案

    取决于不同的解决机制:

    解决方案1:

    在数据库中存入标签的HTML转换符,按照原样输出。

    在存入数据库之前,加上server.htmlEncode(txtboxName.text);

    解决方案2:

    在数据库中不存入HTML标签的有关信息,过滤HTML标签,只显示文字。

    存入数据库之前加上过滤函数过滤便可:checkStr(txtboxName.text)或者StripHTML(txtboxName.text)或者NoHTML(txtboxName.text);(提供三个过滤函数,任选一个,添加命名空间:using System.Text.RegularExpressions;

    以下代码均调试通过:

            /// <summary>

            /// HTML过滤方法一

            /// </summary>

            /// <param name="html"></param>

            /// <returns></returns>

            public string checkStr(string html)

            {

                System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" on[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                html = regex1.Replace(html, ""); //过滤<script></script>标记

                html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性

                html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件

                html = regex4.Replace(html, ""); //过滤iframe

                html = regex5.Replace(html, ""); //过滤frameset

                html = regex6.Replace(html, ""); //过滤frameset

                html = regex7.Replace(html, ""); //过滤frameset

                html = regex8.Replace(html, ""); //过滤frameset

                html = regex9.Replace(html, "");

                html = html.Replace(" ", "");

                html = html.Replace("</strong>", "");

                html = html.Replace("<strong>", "");

                return html;

            }

     

    #region 过滤掉html代码

     

     

     ///   <summary>

            ///   方法二:去除HTML标记

            ///   </summary>

            ///   <param   name="StripHtml">包括HTML的源码  </param>

            ///   <returns>已经去除后的文字</returns>

     

            public static string StripHTML(string strHtml)

            {

                string[] aryReg ={

                                    @"<script[^>]*?>.*?</script>",

     

                                    @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",

                                    @"([\r\n])[\s]+",

                                    @"&(quot|#34);",

                                    @"&(amp|#38);",

                                    @"&(lt|#60);",

                                    @"&(gt|#62);",

                                    @"&(nbsp|#160);",

                                    @"&(iexcl|#161);",

                                    @"&(cent|#162);",

                                    @"&(pound|#163);",

                                    @"&(copy|#169);",

                                    @"&#(\d+);",

                                    @"-->",

                                    @"<!--.*\n"

                                   };

     

                string[] aryRep = {

                                    "",

                                    "",

                                    "",

                                    "\"",

                                    "&",

                                    "<",

                                    ">",

                                    " ",

                                    "\xa1",//chr(161),

                                    "\xa2",//chr(162),

                                    "\xa3",//chr(163),

                                    "\xa9",//chr(169),

                                    "",

                                    "\r\n",

                                    ""

                                   };

     

                string newReg = aryReg[0];

                string strOutput = strHtml;

                for (int i = 0; i < aryReg.Length; i++)

                {

                    System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(aryReg[i], System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                    strOutput = regex.Replace(strOutput, aryRep[i]);

                }

                strOutput.Replace("<", "");

                strOutput.Replace(">", "");

                strOutput.Replace("\r\n", "");

                return strOutput;

            }

            #endregion

     

     

     

     

     

    ///   <summary>

            ///   方法三:去除HTML标记

            ///   </summary>

            ///   <param   name="NoHTML">包括HTML的源码  </param>

            ///   <returns>已经去除后的文字</returns>

            public static string NoHTML(string Htmlstring)

            {

                //删除脚本

                Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "",

                  RegexOptions.IgnoreCase);

                //删除HTML

                Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "",

                  RegexOptions.IgnoreCase);

                Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "",

                  RegexOptions.IgnoreCase);

                Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);

                Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);

                Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"",

                  RegexOptions.IgnoreCase);

                Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&",

                  RegexOptions.IgnoreCase);

                Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<",

                  RegexOptions.IgnoreCase);

                Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">",

                  RegexOptions.IgnoreCase);

                Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", "   ",

                  RegexOptions.IgnoreCase);

                Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1",

                  RegexOptions.IgnoreCase);

                Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2",

                  RegexOptions.IgnoreCase);

                Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3",

                  RegexOptions.IgnoreCase);

                Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9",

                  RegexOptions.IgnoreCase);

                Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "",

                  RegexOptions.IgnoreCase);

                Htmlstring.Replace("<", "");

                Htmlstring.Replace(">", "");

                Htmlstring.Replace("\r\n", "");

                Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();

                return Htmlstring;

            }

     

  • 相关阅读:
    表格文字溢出用省略号代替处理方法
    【Linux】Mysql5.7.12源码编译安装及配置
    【Linux】JDK安装及配置
    [SDOI2017]序列计数
    P1777 帮助_NOI导刊2010提高(03)
    BSGS和EXBSGS
    排列最小值
    三元组[01 Trie计数]
    数学题
    The Preliminary Contest for ICPC Asia Xuzhou 2019 E XKC's basketball team [单调栈上二分]
  • 原文地址:https://www.cnblogs.com/shineqiujuan/p/1335683.html
Copyright © 2011-2022 走看看