zoukankan      html  css  js  c++  java
  • C# NHtmlFilter 帮你过滤Html危险脚本 防止XSS攻击

    转:http://www.oschina.net/code/snippet_222150_9776

    与原文代码略有改动

     /// <summary>
            /// Html 脚本过滤
            /// </summary>
            public class NHtmlFilter
            {
                protected static readonly RegexOptions REGEX_FLAGS_SI = RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled;
    
                private static string P_COMMENTS = "<!--(.*?)-->";
                private static Regex P_COMMENT = new Regex("^!--(.*)--$", REGEX_FLAGS_SI);
                private static string P_TAGS = "<(.*?)>";
                private static Regex P_END_TAG = new Regex("^/([a-z0-9]+)", REGEX_FLAGS_SI);
                private static Regex P_START_TAG = new Regex("^([a-z0-9]+)(.*?)(/?)$", REGEX_FLAGS_SI);
                private static Regex P_QUOTED_ATTRIBUTES = new Regex("([a-z0-9|(a-z0-9\-a-z0-9)]+)=(["'])(.*?)\2", REGEX_FLAGS_SI);
                private static Regex P_UNQUOTED_ATTRIBUTES = new Regex("([a-z0-9]+)(=)([^"\s']+)", REGEX_FLAGS_SI);
                private static Regex P_PROTOCOL = new Regex("^([^:]+):", REGEX_FLAGS_SI);
                private static Regex P_ENTITY = new Regex("&#(\d+);?");
                private static Regex P_ENTITY_UNICODE = new Regex("&#x([0-9a-f]+);?");
                private static Regex P_ENCODE = new Regex("%([0-9a-f]{2});?");
                private static Regex P_VALID_ENTITIES = new Regex("&([^&;]*)(?=(;|&|$))");
                private static Regex P_VALID_QUOTES = new Regex("(>|^)([^<]+?)(<|$)", RegexOptions.Singleline | RegexOptions.Compiled);
                private static string P_END_ARROW = "^>";
                private static string P_BODY_TO_END = "<([^>]*?)(?=<|$)";
                private static string P_XML_CONTENT = "(^|>)([^<]*?)(?=>)";
                private static string P_STRAY_LEFT_ARROW = "<([^>]*?)(?=<|$)";
                private static string P_STRAY_RIGHT_ARROW = "(^|>)([^<]*?)(?=>)";
                private static string P_AMP = "&";
                private static string P_QUOTE = """;
                private static string P_LEFT_ARROW = "<";
                private static string P_RIGHT_ARROW = ">";
                private static string P_BOTH_ARROWS = "<>";
    
                // @xxx could grow large... maybe use sesat's ReferenceMap
                private static Dictionary<String, string> P_REMOVE_PAIR_BLANKS = new Dictionary<String, string>();
                private static Dictionary<String, string> P_REMOVE_SELF_BLANKS = new Dictionary<String, string>();
                /** 
                 * flag determining whether to try to make tags when presented with "unbalanced"
                 * angle brackets (e.g. "<b text </b>" becomes "<b> text </b>").  If set to false,
                 * unbalanced angle brackets will be html escaped.
                 */
                protected static bool alwaysMakeTags = true;
    
                /**
                 * flag determing whether comments are allowed in input String.
                 */
                protected static bool stripComment = true;
    
    
                /// <summary>
                /// 不允许
                /// </summary>
                private String[] vDisallowed { get; set; }
                /// <summary>
                /// 允许
                /// </summary>
                protected Dictionary<String, List<String>> vAllowed { get; set; }
    
                /** counts of open tags for each (allowable) html element **/
                protected Dictionary<String, int> vTagCounts;
    
                /** html elements which must always be self-closing (e.g. "<img />") **/
                protected String[] vSelfClosingTags;
    
                /** html elements which must always have separate opening and closing tags (e.g. "<b></b>") **/
                protected String[] vNeedClosingTags;
    
                /** attributes which should be checked for valid protocols **/
                protected String[] vProtocolAtts;
    
                /** allowed protocols **/
                protected String[] vAllowedProtocols;
    
                /** tags which should be removed if they contain no content (e.g. "<b></b>" or "<b />") **/
                protected String[] vRemoveBlanks;
    
                /** entities allowed within html markup **/
                protected String[] vAllowedEntities;
    
    
                /// <summary>
                /// 是否为调试
                /// </summary>
                protected bool vDebug;
    
                public NHtmlFilter() : this(false){}
    
                public NHtmlFilter(bool debug)
                {
                    //List<Item> vAllowed = new List<Item>();
                    vAllowed = new Dictionary<String, List<String>>();
                    #region 允许通过数组
    
                    vAllowed.Add("a", new List<string>() { "target", "href", "title", "class", "style" });
                    vAllowed.Add("addr", new List<string>() { "title", "class", "style" });
                    vAllowed.Add("address", new List<string>() { "class", "style" });
                    vAllowed.Add("area", new List<string>() { "shape", "coords", "href", "alt" });
                    vAllowed.Add("article", new List<string>() { });
                    vAllowed.Add("aside", new List<string>() { });
                    vAllowed.Add("audio", new List<string>() { "autoplay", "controls", "loop", "preload", "src", "class", "style" });
                    vAllowed.Add("b", new List<string>() { "class", "style" });
                    vAllowed.Add("bdi", new List<string>() { "dir" });
                    vAllowed.Add("bdo", new List<string>() { "dir" });
                    vAllowed.Add("big", new List<string>() { });
                    vAllowed.Add("blockquote", new List<string>() { "cite", "class", "style" });
                    vAllowed.Add("br", new List<string>() { });
                    vAllowed.Add("caption", new List<string>() { "class", "style" });
                    vAllowed.Add("center", new List<string>() { });
                    vAllowed.Add("cite", new List<string>() { });
                    vAllowed.Add("code", new List<string>() { "class", "style" });
                    vAllowed.Add("col", new List<string>() { "align", "valign", "span", "width", "class", "style" });
                    vAllowed.Add("colgroup", new List<string>() { "align", "valign", "span", "width", "class", "style" });
                    vAllowed.Add("dd", new List<string>() { "class", "style" });
                    vAllowed.Add("del", new List<string>() { "datetime" });
                    vAllowed.Add("details", new List<string>() { "open" });
                    vAllowed.Add("div", new List<string>() { "class", "style" });
                    vAllowed.Add("dl", new List<string>() { "class", "style" });
                    vAllowed.Add("dt", new List<string>() { "class", "style" });
                    vAllowed.Add("em", new List<string>() { "class", "style" });
                    vAllowed.Add("font", new List<string>() { "color", "size", "face" });
                    vAllowed.Add("footer", new List<string>() { });
                    vAllowed.Add("h1", new List<string>() { "class", "style" });
                    vAllowed.Add("h2", new List<string>() { "class", "style" });
                    vAllowed.Add("h3", new List<string>() { "class", "style" });
                    vAllowed.Add("h4", new List<string>() { "class", "style" });
                    vAllowed.Add("h5", new List<string>() { "class", "style" });
                    vAllowed.Add("h6", new List<string>() { "class", "style" });
                    vAllowed.Add("header", new List<string>() { });
                    vAllowed.Add("hr", new List<string>() { });
                    vAllowed.Add("i", new List<string>() { "class", "style" });
                    vAllowed.Add("img", new List<string>() { "src", "alt", "title", "style", "width", "height", "id", "_src", "loadingclass", "class", "data-latex", "data-id", "data-type", "data-s" });
                    vAllowed.Add("ins", new List<string>() { "datetime" });
                    vAllowed.Add("li", new List<string>() { "class", "style" });
                    vAllowed.Add("mark", new List<string>() { });
                    vAllowed.Add("nav", new List<string>() { });
                    vAllowed.Add("ol", new List<string>() { "class", "style" });
                    vAllowed.Add("p", new List<string>() { "class", "style" });
                    vAllowed.Add("pre", new List<string>() { "class", "style" });
                    vAllowed.Add("s", new List<string>() { });
                    vAllowed.Add("section", new List<string>() { });
                    vAllowed.Add("small", new List<string>() { });
                    vAllowed.Add("span", new List<string>() { "class", "style" });
                    vAllowed.Add("sub", new List<string>() { "class", "style" });
                    vAllowed.Add("sup", new List<string>() { "class", "style" });
                    vAllowed.Add("strong", new List<string>() { "class", "style" });
                    vAllowed.Add("table", new List<string>() { "width", "border", "align", "valign", "class", "style" });
                    vAllowed.Add("tbody", new List<string>() { "align", "valign", "class", "style" });
                    vAllowed.Add("td", new List<string>() { "width", "rowspan", "colspan", "align", "valign", "class", "style" });
                    vAllowed.Add("tfoot", new List<string>() { "align", "valign", "class", "style" });
                    vAllowed.Add("th", new List<string>() { "width", "rowspan", "colspan", "align", "valign", "class", "style" });
                    vAllowed.Add("thead", new List<string>() { "align", "valign", "class", "style" });
                    vAllowed.Add("tr", new List<string>() { "rowspan", "align", "valign", "class", "style" });
                    vAllowed.Add("tt", new List<string>() { });
                    vAllowed.Add("u", new List<string>() { });
                    vAllowed.Add("ul", new List<string>() { "class", "style" });
                    vAllowed.Add("video", new List<string>() { "autoplay", "controls", "loop", "preload", "src", "height", "width", "class", "style" });
                    #endregion
    
    
                    vDebug = debug;
                    vTagCounts = new Dictionary<String, int>();
    
                    vSelfClosingTags = new String[] { "img" };
                    vNeedClosingTags = new String[] { "a", "b", "strong", "i", "em" };
                    vDisallowed = new String[] { "script" };
                    vAllowedProtocols = new String[] { "http", "mailto" }; // no ftp.
                    vProtocolAtts = new String[] { "src", "href" };
                    vRemoveBlanks = new String[] { "a", "b", "strong", "i", "em" };
                    vAllowedEntities = new String[] { "amp", "gt", "lt", "quot" };
                    stripComment = true;
                    alwaysMakeTags = true;
                }
    
    
                protected void reset()
                {
                    vTagCounts = new Dictionary<String, int>();
                }
    
                protected void debug(String msg)
                {
                    if (vDebug)
                        System.Diagnostics.Debug.WriteLine(msg);
                }
    
                //---------------------------------------------------------------
                // my versions of some PHP library functions
    
                public static String chr(int dec)
                {
                    return "" + ((char)dec);
                }
    
                /// <summary>
                /// 转换成实体字符
                /// </summary>
                /// <param name="str"></param>
                /// <returns></returns>
                public static String htmlSpecialChars(String str)
                {
                    str = str.Replace(P_QUOTE, "&quot;");
                    str = str.Replace(P_LEFT_ARROW, "&lt;");
                    str = str.Replace(P_RIGHT_ARROW, "&gt;");
                    str = str.Replace("
    ", "<br>");
                    return str;
                }
    
                //---------------------------------------------------------------
    
                /**
                 * given a user submitted input String, filter out any invalid or restricted
                 * html.
                 * 
                 * @param input text (i.e. submitted by a user) than may contain html
                 * @return "clean" version of input, with only valid, whitelisted html elements allowed
                 */
                public String filter(String input)
                {
                    reset();
                    String s = input;
    
                    debug("************************************************");
                    debug("              INPUT: " + input);
    
                    s = escapeComments(s);
                    debug("     escapeComments: " + s);
    
                    s = balanceHTML(s);
                    debug("        balanceHTML: " + s);
    
                    s = checkTags(s);
                    debug("          checkTags: " + s);
    
                    s = processRemoveBlanks(s);
                    debug("processRemoveBlanks: " + s);
    
                    s = validateEntities(s);
                    debug("    validateEntites: " + s);
    
                    debug("************************************************
    
    ");
                    return s;
                }
    
                protected String escapeComments(String s)
                {
                    return Regex.Replace(s, P_COMMENTS, new MatchEvaluator(ConverMatchComments), RegexOptions.Singleline);
                }
    
                protected String regexReplace(String regex_pattern, String replacement, String s)
                {
                    return Regex.Replace(s, regex_pattern, replacement);
                }
    
                protected String balanceHTML(String s)
                {
                    if (alwaysMakeTags)
                    {
                        //
                        // try and form html
                        //
                        s = regexReplace(P_END_ARROW, "", s);
                        s = regexReplace(P_BODY_TO_END, "<$1>", s);
                        s = regexReplace(P_XML_CONTENT, "$1<$2", s);
    
                    }
                    else
                    {
                        //
                        // escape stray brackets
                        //
                        s = regexReplace(P_STRAY_LEFT_ARROW, "&lt;$1", s);
                        s = regexReplace(P_STRAY_RIGHT_ARROW, "$1$2&gt;<", s);
    
                        //
                        // the last regexp causes '<>' entities to appear
                        // (we need to do a lookahead assertion so that the last bracket can
                        // be used in the next pass of the regexp)
                        //
                        s = s.Replace(P_BOTH_ARROWS, "");
                    }
                    return s;
                }
    
                protected String checkTags(String s)
                {
                    //替换不允许标签
                    foreach (var item in vDisallowed)
                    {
                        s = Regex.Replace(s, string.Format(@"<{0}(.)*?>(.)+?</{0}>", item), "");
                    }
                    s = Regex.Replace(s, P_TAGS, new MatchEvaluator(ConverMatchTags), RegexOptions.Singleline);
    
                    // these get tallied in processTag
                    // (remember to reset before subsequent calls to filter method)
                    foreach (String key in vTagCounts.Keys)
                    {
                        for (int ii = 0; ii < vTagCounts[key]; ii++)
                        {
                            s += "</" + key + ">";
                        }
                    }
    
                    return s;
                }
    
                protected String processRemoveBlanks(String s)
                {
                    foreach (String tag in vRemoveBlanks)
                    {
                        s = regexReplace("<" + tag + "(\s[^>]*)?></" + tag + ">", "", s);
                        s = regexReplace("<" + tag + "(\s[^>]*)?/>", "", s);
                    }
                    return s;
                }
    
                private String processTag(String s)
                {
                    // ending tags
                    Match m = P_END_TAG.Match(s);
                    if (m.Success)
                    {
                        string name = m.Groups[1].Value.ToLower();
                        if (allowed(name))
                        {
                            if (!inArray(name, vSelfClosingTags))
                            {
                                if (vTagCounts.ContainsKey(name))
                                {
                                    vTagCounts[name] = vTagCounts[name] - 1;
                                    return "</" + name + ">";
                                }
                            }
                        }
                    }
    
    
                    // starting tags
                    m = P_START_TAG.Match(s);
                    if (m.Success)
                    {
                        String name = m.Groups[1].Value.ToLower();
                        String body = m.Groups[2].Value;
                        String ending = m.Groups[3].Value;
    
                        //debug( "in a starting tag, name='" + name + "'; body='" + body + "'; ending='" + ending + "'" );
                        if (allowed(name))
                        {
                            String params1 = "";
    
                            MatchCollection m2 = P_QUOTED_ATTRIBUTES.Matches(body);
                            MatchCollection m3 = P_UNQUOTED_ATTRIBUTES.Matches(body);
                            List<String> paramNames = new List<String>();
                            List<String> paramValues = new List<String>();
                            foreach (Match match in m2)
                            {
                                paramNames.Add(match.Groups[1].Value); //([a-z0-9]+)
                                paramValues.Add(match.Groups[3].Value); //(.*?)
                            }
                            foreach (Match match in m3)
                            {
                                paramNames.Add(match.Groups[1].Value); //([a-z0-9]+)
                                paramValues.Add(match.Groups[3].Value); //([^"\s']+)
                            }
    
                            String paramName, paramValue;
                            for (int ii = 0; ii < paramNames.Count; ii++)
                            {
                                paramName = paramNames[ii].ToLower();
                                paramValue = paramValues[ii];
    
                                if (allowedAttribute(name, paramName))
                                {
                                    if (inArray(paramName, vProtocolAtts))
                                    {
                                        paramValue = processParamProtocol(paramValue);
                                    }
                                    params1 += " " + paramName + "="" + paramValue + """;
                                }
                            }
    
                            if (inArray(name, vSelfClosingTags))
                            {
                                ending = " /";
                            }
    
                            if (inArray(name, vNeedClosingTags))
                            {
                                ending = "";
                            }
    
                            if (ending == null || ending.Length < 1)
                            {
                                if (vTagCounts.ContainsKey(name))
                                {
                                    vTagCounts[name] = vTagCounts[name] + 1;
                                }
                                else
                                {
                                    vTagCounts.Add(name, 1);
                                }
                            }
                            else
                            {
                                ending = " /";
                            }
                            return "<" + name + params1 + ending + ">";
                        }
                        else
                        {
                            return "";
                        }
                    }
    
                    // comments
                    m = P_COMMENT.Match(s);
                    if (!stripComment && m.Success)
                    {
                        return "<" + m.Value + ">";
                    }
    
                    return "";
                }
    
                private String processParamProtocol(String s)
                {
                    s = decodeEntities(s);
                    Match m = P_PROTOCOL.Match(s);
                    if (m.Success)
                    {
                        String protocol = m.Groups[1].Value;
                        if (!inArray(protocol, vAllowedProtocols))
                        {
                            // bad protocol, turn into local anchor link instead
                            s = "#" + s.Substring(protocol.Length + 1, s.Length - protocol.Length - 1);
                            if (s.StartsWith("#//"))
                            {
                                s = "#" + s.Substring(3, s.Length - 3);
                            }
                        }
                    }
                    return s;
                }
    
                private String decodeEntities(String s)
                {
    
                    s = P_ENTITY.Replace(s, new MatchEvaluator(ConverMatchEntity));
    
                    s = P_ENTITY_UNICODE.Replace(s, new MatchEvaluator(ConverMatchEntityUnicode));
    
                    s = P_ENCODE.Replace(s, new MatchEvaluator(ConverMatchEntityUnicode));
    
                    s = validateEntities(s);
                    return s;
                }
    
                private String validateEntities(String s)
                {
                    s = P_VALID_ENTITIES.Replace(s, new MatchEvaluator(ConverMatchValidEntities));
                    s = P_VALID_QUOTES.Replace(s, new MatchEvaluator(ConverMatchValidQuotes));
                    return s;
                }
    
                private static bool inArray(String s, String[] array)
                {
                    foreach (String item in array)
                    {
                        if (item != null && item.Equals(s))
                        {
                            return true;
                        }
                    }
                    return false;
                }
    
                private bool allowed(String name)
                {
                    return (vAllowed.Count == 0 || vAllowed.ContainsKey(name)) && !inArray(name, vDisallowed);
                }
    
                private bool allowedAttribute(String name, String paramName)
                {
                    return allowed(name) && (vAllowed.Count == 0 || vAllowed[name].Contains(paramName));
                }
    
                private String checkEntity(String preamble, String term)
                {
    
                    return ";".Equals(term) && isValidEntity(preamble)
                            ? '&' + preamble
                            : "&amp;" + preamble;
                }
                private bool isValidEntity(String entity)
                {
                    return inArray(entity, vAllowedEntities);
                }
                private static string ConverMatchComments(Match match)
                {
                    string matchValue = "<!--" + htmlSpecialChars(match.Groups[1].Value) + "-->";
                    return matchValue;
                }
    
                private string ConverMatchTags(Match match)
                {
                    string matchValue = processTag(match.Groups[1].Value);
                    return matchValue;
                }
    
                private string ConverMatchEntity(Match match)
                {
                    string v = match.Groups[1].Value;
                    int decimal1 = int.Parse(v);
                    return chr(decimal1);
                }
    
                private string ConverMatchEntityUnicode(Match match)
                {
                    string v = match.Groups[1].Value;
                    int decimal1 = Convert.ToInt32("0x" + v, 16);
                    return chr(decimal1);
                }
    
                private string ConverMatchValidEntities(Match match)
                {
                    String one = match.Groups[1].Value; //([^&;]*)
                    String two = match.Groups[2].Value; //(?=(;|&|$))
                    return checkEntity(one, two);
                }
                private string ConverMatchValidQuotes(Match match)
                {
                    String one = match.Groups[1].Value; //(>|^)
                    String two = match.Groups[2].Value; //([^<]+?)
                    String three = match.Groups[3].Value;//(<|$)
                    return one + regexReplace(P_QUOTE, "&quot;", two) + three;
                }
    
                public bool isAlwaysMakeTags()
                {
                    return alwaysMakeTags;
                }
    
                public bool isStripComments()
                {
                    return stripComment;
                }
    
                class Item
                {
                    public string name { get; set; }
                    public List<string> parameter { get; set; }
                }
    
            }

    调用:

    var x = new Common.NHtmlFilter(false);

    string content=
    "文本<a style='color:red;'>xxxxxxx</a><script>alert(1)</script>";

    string str = x.filter(content);
  • 相关阅读:
    c++ 左值、右值;左值引用、右值引用
    leetcode 837 新21点
    c++ 继承和组合
    ubuntu 16.04 常用命令小结
    vim 常用命令小结(转)
    leetcode 1371. 每个元音包含偶数次的最长子字符串 (状压 + 前缀和 +hash)
    leetcode 974 和可被K整除的子数组
    leetcode 910 最小差值II
    关于 mysqladmin
    PHP闭包(Closure)初探
  • 原文地址:https://www.cnblogs.com/OleRookie/p/5970167.html
Copyright © 2011-2022 走看看