public static string FilterHtml(string string_include_html) { string[] HtmlRegexArr ={ #region Html 正则数组 @"<script[^>]*?>.*?</script>", @"<(/s*)?!?((w+:)?w+)(w+(s*=?s*(([""'])(\[""'tbnr]|[^7])*?7|w+)|.{0})|s)*?(/s*)?>", @"([ ])[s]+", @"&(quot|#34);", @"&(amp|#38);", @"&(lt|#60);", @"&(gt|#62);", @"&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);", @"&(copy|#169);", @"&#(d+);", @"-->", @"<!--.* " #endregion }; string[] HtmlReplaceArr = { #region 替换Html字符 "", "", "", """, "&", "<", ">", " ", "xa1", "xa2", "xa3", "xa9", "", " ", "" #endregion }; string string_no_html = null; for (int i = 0; i < HtmlRegexArr.Length; i++) { System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(HtmlRegexArr[i], System.Text.RegularExpressions.RegexOptions.IgnoreCase); string_no_html = regex.Replace(string_include_html, HtmlReplaceArr[i]); } string_no_html.Replace("<", ""); string_no_html.Replace(">", ""); string_no_html.Replace(" ", ""); return string_no_html; }
以上来至网络,但个人认为还是不行。故有以下自己写的:
/// <summary> /// 将Html标签转化为空 /// </summary> /// <param name="strHtml">待转化的字符串</param> /// <returns>经过转化的字符串</returns> public static string GetStringNoHtml(string string_include_html) { if (String.IsNullOrEmpty(string_include_html)) { return ""; } else { string_include_html = string_include_html.Replace("<BR>", " ").Replace("<br>", " "); //第一种 string string_no_html = System.Text.RegularExpressions.Regex.Replace(string_include_html, @"(<script[^>]*?>.*?</script>)|(<(.[^>]*)>)", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase); return string_no_html.Replace(" ", " "); //第二种 //return System.Text.RegularExpressions.Regex.Replace(string_include_html, @"(<script[^>]*?>.*?</script>)|(<(.[^>]*)>)|( )", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase); } }