zoukankan      html  css  js  c++  java
  • [转载]C# 中对html 标签过滤

     private string FilterHTML(string html)
            {
                System.Text.RegularExpressions.Regex regex1 =
                      new System.Text.RegularExpressions.Regex(@"<script[sS]+</script *>",
                      System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex2 =
                      new System.Text.RegularExpressions.Regex(@" href *= *[sS]*script *:",
                      System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex3 =
                      new System.Text.RegularExpressions.Regex(@" no[sS]*=",
                      System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex4 =
                      new System.Text.RegularExpressions.Regex(@"<iframe[sS]+</iframe *>",
                      System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex5 =
                      new System.Text.RegularExpressions.Regex(@"<frameset[sS]+</frameset *>",
                      System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex6 =
                      new System.Text.RegularExpressions.Regex(@"<img[^>]+>",
                      System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex7 =
                      new System.Text.RegularExpressions.Regex(@"</p>",
                      System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex8 =
                      new System.Text.RegularExpressions.Regex(@"<p>",
                      System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex9 =
                      new System.Text.RegularExpressions.Regex(@"<[^>]*>",
                      System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                html = regex1.Replace(html, ""); //过滤<script></script>标记 
                html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性 
                html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件 
                html = regex4.Replace(html, ""); //过滤iframe 
                html = regex5.Replace(html, ""); //过滤frameset 
                html = regex6.Replace(html, ""); //过滤frameset 
                html = regex7.Replace(html, ""); //过滤frameset 
                html = regex8.Replace(html, ""); //过滤frameset 
                html = regex9.Replace(html, "");
                //html = html.Replace(" ", "");
                html = html.Replace("</strong>", "");
                html = html.Replace("<strong>", "");
                html = Regex.Replace(html, "[f
    
    	v]", "");  //过滤回车换行制表符
                return html;
            }
  • 相关阅读:
    一本通1273货币系统(方案数背包)
    背包体积循环正序和逆序的区别
    Python字典的底层原理和优缺点
    Linux各目录及每个目录的详细介绍
    openwrt 下python程序后台运行,并将打印信息保存文件
    pycharm同一目录下无法import其他文件
    python sqlite3学习笔记
    python sqlite3查询表记录
    Pycharm快捷键的使用
    Python3 Address already in use 解决方法
  • 原文地址:https://www.cnblogs.com/iack/p/3539118.html
Copyright © 2011-2022 走看看