zoukankan      html  css  js  c++  java
  • 用.net正则去除所有的html源码或者提取出图片地址以及存数据库时的符号替换(如富文本编辑器)

    参照了别人的博客~前两个基本照搬qwq

    http://www.cnblogs.com/vingi/articles/2447861.html

    一、去除所有的html源码,只留下文字。(已试验过)

    1、引头文件

    using System.Text.RegularExpressions

    2、编写函数

    public   static   string    NoHTML(string    Htmlstring)   
      {   
      //删除脚本   
       Htmlstring   =    Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",RegexOptions.IgnoreCase);   
      //删除HTML   
       Htmlstring   =    Regex.Replace(Htmlstring,@"<(.[^>]*)>","",RegexOptions.IgnoreCase);   
       Htmlstring   =    Regex.Replace(Htmlstring,@"([
    ])[s]+","",RegexOptions.IgnoreCase);   
       Htmlstring   =    Regex.Replace(Htmlstring,@"-->","",RegexOptions.IgnoreCase);   
       Htmlstring   =    Regex.Replace(Htmlstring,@"<!--.*","",RegexOptions.IgnoreCase);   
        
       Htmlstring   =    Regex.Replace(Htmlstring,@"&(quot|#34);",""",RegexOptions.IgnoreCase);   
       Htmlstring   =    Regex.Replace(Htmlstring,@"&(amp|#38);","&",RegexOptions.IgnoreCase);   
       Htmlstring   =    Regex.Replace(Htmlstring,@"&(lt|#60);","<",RegexOptions.IgnoreCase);   
       Htmlstring   =    Regex.Replace(Htmlstring,@"&(gt|#62);",">",RegexOptions.IgnoreCase);   
       Htmlstring   =    Regex.Replace(Htmlstring,@"&(nbsp|#160);","   ",RegexOptions.IgnoreCase);   
       Htmlstring   =    Regex.Replace(Htmlstring,@"&(iexcl|#161);","xa1",RegexOptions.IgnoreCase);   
       Htmlstring   =    Regex.Replace(Htmlstring,@"&(cent|#162);","xa2",RegexOptions.IgnoreCase);   
       Htmlstring   =    Regex.Replace(Htmlstring,@"&(pound|#163);","xa3",RegexOptions.IgnoreCase);   
       Htmlstring   =    Regex.Replace(Htmlstring,@"&(copy|#169);","xa9",RegexOptions.IgnoreCase);   
       Htmlstring   =    Regex.Replace(Htmlstring,   @"&#(d+);","",RegexOptions.IgnoreCase);   
        
       Htmlstring.Replace("<","");   
       Htmlstring.Replace(">","");   
       Htmlstring.Replace("
    ","");   
       Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();   
        
      return    Htmlstring;   
       }

    二、把html中图片地址提取出来(未实验)

    1、先把html的标签符号移除

     public   static   string      ParseTags(string    HTMLStr)   
      {   
      return    System.Text.RegularExpressions.Regex.Replace(HTMLStr,   "<[^>]*>",   "");     
       }  

    2、再把图片地址取出来

    public   static   string    GetImgUrl(string    HTMLStr)   
                      {   
                              string    str   =   string.Empty;   
                              string    sPattern   =   @"^<imgs+[^>]*>";   
                               Regex    r   =   new    Regex(@"<imgs+[^>]*s*srcs*=s*([']?)(?<url>S+)'?[^>]*>",   
                                       RegexOptions.Compiled);   
                               Match    m   =    r.Match(HTMLStr.ToLower());   
                              if    (m.Success)   
                                       str   =    m.Result("${url}");   
                              return    str;   
                       }  

    三、把富文本编辑器里的内容存入数据库时需要进行图片替换 不然会出bug

    static public string change(string x)//对富文本编辑器中获取的内容 html标签进行处理,避免它存到数据库的时候被转义
        {
            x = x.Replace("&lt;", "<");//对一些特殊字符进行替换
            x = x.Replace("&gt;", ">");
            x = x.Replace("&quot;", """);
    
            return x;
        }
  • 相关阅读:
    BZOJ 1101: [POI2007]Zap [莫比乌斯反演]
    磁盘I/O高居不下,通过什么来查看占用I/O的进程?
    jmeter中特殊的时间处理方式
    Fiddler抓包工具版面认识(一)
    Jmeter 时间函数工具汇总
    Jmeter之__CSVRead随机读取变量
    jmeter配置元件之计数器
    Jmeter全局变量设置
    数据驱动和关键字驱动
    sqlmap工具命令行的含义
  • 原文地址:https://www.cnblogs.com/ivan99/p/6658011.html
Copyright © 2011-2022 走看看