zoukankan      html  css  js  c++  java
  • 利用正则表达式除去html得到纯文本

    public static string DelHTML(string Htmlstring)//将HTML去除

             {     
                 #region
                 //删除脚本

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 //删除HTML

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<(.[^>]*)>","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"([\r\n])[\s]+","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"-->","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<!--.*","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);
               
                 //Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<A>.*</A>","");
           
                 //Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<[a-zA-Z]*=\.[a-zA-Z]*\?[a-zA-Z]+=\d&\w=%[a-zA-Z]*|[A-Z0-9]","");

                           

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(quot|#34);","\"",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(amp|#38);","&",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(lt|#60);","<",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(gt|#62);",">",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(nbsp|#160);"," ",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(iexcl|#161);","\xa1",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring,@"&(cent|#162);","\xa2",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(pound|#163);","\xa3",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(copy|#169);","\xa9",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring, @"&#(\d+);","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

               
                 Htmlstring.Replace("<","");

                 Htmlstring.Replace(">","");

                 Htmlstring.Replace("\r\n","");

                 //Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
                 #endregion


                 return Htmlstring;

             }

  • 相关阅读:
    IIS 添加二级应用程序
    VS中发布并调试IIS程序
    未启用当前数据库的 SQL Server Service Broker,因此查询通知不受支持。如果希望使用通知,请为此数据库启用 Service Broker
    Flash基础开发习惯指要
    2012云计算扫盲
    flash问题集锦(新手必看)
    Flash常用ActionScript控制语句基本用法祥解
    通过offset值的设置使html元素对齐
    不用float也可以让div横向显示
    QQ空间里写的开发心得
  • 原文地址:https://www.cnblogs.com/hfzsjz/p/1674844.html
Copyright © 2011-2022 走看看