zoukankan      html  css  js  c++  java
  • 利用js种的正则删除html标签

     1 public static string NoHTML(string Htmlstring)
     2 {
     3 
     4 
     5 //删除脚本 
     6 Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
     7 //删除HTML 
     8 Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
     9 Htmlstring = Regex.Replace(Htmlstring, @"([
    ])[s]+", "", RegexOptions.IgnoreCase);
    10 Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
    11 Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
    12 
    13 Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", """, RegexOptions.IgnoreCase);
    14 Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
    15 Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
    16 Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
    17 Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
    18 Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "xa1", RegexOptions.IgnoreCase);
    19 Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "xa2", RegexOptions.IgnoreCase);
    20 Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "xa3", RegexOptions.IgnoreCase);
    21 Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "xa9", RegexOptions.IgnoreCase);
    22 Htmlstring = Regex.Replace(Htmlstring, @"&#(d+);", "", RegexOptions.IgnoreCase);
    23 
    24 Htmlstring.Replace("<", "");
    25 Htmlstring.Replace(">", "");
    26 Htmlstring.Replace("
    ", "");
    27 
    28 return Htmlstring;
    29 }
  • 相关阅读:
    python学习中的bug
    python爬虫工具
    Win系统的快捷键和Macos系统的快捷键
    java基础编程
    linux下的ssh和rynsc
    我为什么要搞无人驾驶
    无人驾驶用到哪些具体技术
    java生成excel,word文件
    java后端实习,从最简单的crud做起
    开发经验和习惯
  • 原文地址:https://www.cnblogs.com/McChen/p/4415174.html
Copyright © 2011-2022 走看看