参考文章:
1)匹配嵌套的构造(较复杂)
2)解读C#正则表达式
3)[正则表达式] 可以解析HTML/XHTML页面的所有元素和结构的Regular Expression!
class Class1
{
string tag = @"(?:[\w-:]+)";
string attribute = @"(?:[\w-:]+)(?:(\s)*=(\s)*(?:[^\s\>\<]*|\""[^\""]*\""|\'[^\']*\'))?";
string name = @"(?:[\w-:]+)";
string argument = @"(?:[\w-:]+|\""[\s\S]*?\""|\'[\s\S]*?\')";
string beginningTag = @"(?:\<" + tag + @"(?:\s+" +attribute + @")*\s*(?:/)?\>)";
string endingTag = @"(?:\</" + tag + @"\>)";
string xmlComment = @"(?:\<!--[\s\S]*?--\>)";
string xmlDirective = @"(?:\<!" +name + @"(?:\s+" +argument + @")*\s*\>)";
string xmlCData = @"(?:\<!\[CDATA\[(?:[\s\S]*?)\]\]\>)";
string styleBlock = @"(?:(?:\<(?:Style)(?:\s+" +attribute + @")*\s*(?:/)?\>)(?:[\s\S]*?)(?:\</(?:Style)\>))";
string scriptBlock = @"(?:(?:\<(?:script)(?:\s+" +attribute + @")*\s*(?:/)?\>)(?:[\s\S]*?)(?:\</(?:script)\>))";
string xmlLiteral = @"(?:(?:(?<blank>[ ]+)|[^ \<\>])+)";
public static string CapText(Match m)
{
return "<!--"+m.Value+"-->";
}
static void Main()
{
string text = "<script > jaiowjefw </script>sdfsdf<script>fdf</ScripT> .";
string pattern = Class1.scriptblock;
string result = Regex.Replace(text, pattern, new
MatchEvaluator(Class1.CapText), RegexOpetion.IgnoreCase |
RegexOption.Compiled);
System.Console.WriteLine(result);
}
}