zoukankan      html  css  js  c++  java
  • 利用正则表达式除去html得到纯文本

    public static string DelHTML(string Htmlstring)//将HTML去除

             {     
                 #region
                 //删除脚本

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 //删除HTML

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<(.[^>]*)>","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"([\r\n])[\s]+","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"-->","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<!--.*","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);
               
                 //Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<A>.*</A>","");
           
                 //Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<[a-zA-Z]*=\.[a-zA-Z]*\?[a-zA-Z]+=\d&\w=%[a-zA-Z]*|[A-Z0-9]","");

                           

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(quot|#34);","\"",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(amp|#38);","&",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(lt|#60);","<",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(gt|#62);",">",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(nbsp|#160);"," ",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(iexcl|#161);","\xa1",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring,@"&(cent|#162);","\xa2",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(pound|#163);","\xa3",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(copy|#169);","\xa9",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring, @"&#(\d+);","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

               
                 Htmlstring.Replace("<","");

                 Htmlstring.Replace(">","");

                 Htmlstring.Replace("\r\n","");

                 //Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
                 #endregion


                 return Htmlstring;

             }

  • 相关阅读:
    招聘.Net中高级软件研发工程师
    布局和救火
    UITableView详解(转)
    iOS开发那些事--性能优化–内存泄露问题的解决(转)
    LeeCode(PHP) 2.add-two-numbers
    LeeCode(PHP) 1.Two Sum
    PHP实现 序列帧拆分
    PHPExcel导出大量数据超时及内存错误解决方法(转)
    laravel路由 实现短连接生成及跳转(php 301重定向)
    从扑克牌中随机抽取5张牌,判断是不是一个顺子,即这5张牌是不是连续(面试题)
  • 原文地址:https://www.cnblogs.com/hfzsjz/p/1674844.html
Copyright © 2011-2022 走看看