zoukankan      html  css  js  c++  java
  • 利用正则表达式除去html得到纯文本

    public static string DelHTML(string Htmlstring)//将HTML去除

             {     
                 #region
                 //删除脚本

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 //删除HTML

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<(.[^>]*)>","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"([\r\n])[\s]+","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"-->","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<!--.*","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);
               
                 //Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<A>.*</A>","");
           
                 //Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<[a-zA-Z]*=\.[a-zA-Z]*\?[a-zA-Z]+=\d&\w=%[a-zA-Z]*|[A-Z0-9]","");

                           

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(quot|#34);","\"",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(amp|#38);","&",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(lt|#60);","<",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(gt|#62);",">",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(nbsp|#160);"," ",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(iexcl|#161);","\xa1",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring,@"&(cent|#162);","\xa2",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(pound|#163);","\xa3",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(copy|#169);","\xa9",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring, @"&#(\d+);","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

               
                 Htmlstring.Replace("<","");

                 Htmlstring.Replace(">","");

                 Htmlstring.Replace("\r\n","");

                 //Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
                 #endregion


                 return Htmlstring;

             }

  • 相关阅读:
    mysql5.7.17安装配置图文教程
    Idea破解
    git pull总是要输入账号和密码
    java 查看线程死锁
    SpringBoot:四种读取properties文件的方式
    Springboot的常规属性配置和类型安全配置
    java web应用连接mysql会突然connection连接失败
    设计模式--单例模式(二)登记式
    设计模式--单例模式(一)懒汉式和饿汉式
    Springboot的2种启动方式
  • 原文地址:https://www.cnblogs.com/hfzsjz/p/1674844.html
Copyright © 2011-2022 走看看