zoukankan      html  css  js  c++  java
  • 调用下面的方法屏蔽所有html标签提取文本

    public static string NoHTML(string Htmlstring) //替换HTML标记
      {
      string pattern = "http://([^\\s]+)\".+?span.+?\\[(.+?)\\].+?>(.+?)<";
      Regex reg = new Regex(pattern, RegexOptions.IgnoreCase);
      //删除脚本
      Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
      //删除HTML
      Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
      Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
      Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
      Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
      Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
      Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
      Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
      Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
      Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
      Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
      Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
      Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
      Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
      Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
      Htmlstring = Regex.Replace(Htmlstring, @"<img[^>]*>;", "", RegexOptions.IgnoreCase);
      Htmlstring.Replace("<", "");
      Htmlstring.Replace(">", "");
      Htmlstring.Replace("\r\n", "");
      Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
      return Htmlstring;
      }
    
  • 相关阅读:
    POJ 1611 The Suspects
    POJ 2001 Shortest Prefixes(字典树)
    HDU 1251 统计难题(字典树 裸题 链表做法)
    G++ C++之区别
    PAT 乙级 1013. 数素数 (20)
    PAT 乙级 1012. 数字分类 (20)
    PAT 乙级 1009. 说反话 (20)
    PAT 乙级 1008. 数组元素循环右移问题 (20)
    HDU 6063 17多校3 RXD and math(暴力打表题)
    HDU 6066 17多校3 RXD's date(超水题)
  • 原文地址:https://www.cnblogs.com/wangchuang/p/2507343.html
Copyright © 2011-2022 走看看