zoukankan      html  css  js  c++  java
  • 利用正则表达式除去html得到纯文本

    public static string DelHTML(string Htmlstring)//将HTML去除

             {     
                 #region
                 //删除脚本

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 //删除HTML

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<(.[^>]*)>","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"([\r\n])[\s]+","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"-->","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<!--.*","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);
               
                 //Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<A>.*</A>","");
           
                 //Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<[a-zA-Z]*=\.[a-zA-Z]*\?[a-zA-Z]+=\d&\w=%[a-zA-Z]*|[A-Z0-9]","");

                           

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(quot|#34);","\"",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(amp|#38);","&",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(lt|#60);","<",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(gt|#62);",">",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(nbsp|#160);"," ",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(iexcl|#161);","\xa1",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring,@"&(cent|#162);","\xa2",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(pound|#163);","\xa3",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(copy|#169);","\xa9",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                 Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring, @"&#(\d+);","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);

               
                 Htmlstring.Replace("<","");

                 Htmlstring.Replace(">","");

                 Htmlstring.Replace("\r\n","");

                 //Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
                 #endregion


                 return Htmlstring;

             }

  • 相关阅读:
    a<<=n
    IP地址转换、主机大小端、htonl、ntohl实现
    判断系统大小端方法分析与总结
    C++ 初始化列表(转载)
    QString::arg()//用字符串变量参数依次替代字符串中最小数值
    QTableView和QTableWidget翻页功能实现
    QTableWidget详解(样式、右键菜单、表头塌陷、多选等)
    QT CLASS
    Linux下添加新硬盘,分区及挂载
    c语言基本函数
  • 原文地址:https://www.cnblogs.com/hfzsjz/p/1674844.html
Copyright © 2011-2022 走看看