zoukankan      html  css  js  c++  java
  • 去除HTML标记 修改p标签为br,修改br为br,保留img标签

    贴上代码,自己看

    网络上找的,自己做过一点修改,还是蛮不错的。

    View Code
    ///<summary>   
    /// 去除HTML标记 修改p标签为br,修改br为br,保留img标签
    ///</summary>
    ///<param name="NoHTML">包括HTML的源码 </param>
    ///<returns>已经去除后的文字</returns>
    public static string NoHTML(string Htmlstring)
    {


    string[] img = GetIMG(Htmlstring);
    for (int i = 0; i < img.Length; i++)
    {
    if (img[i] == null)
    break;
    Htmlstring=Htmlstring.Replace(img[i], "img无敌"+i);
    }
    Htmlstring = Htmlstring.Replace("</P>", "1234a3211");
    Htmlstring = Htmlstring.Replace("</p>", "1234a3211");
    Htmlstring = Htmlstring.Replace("<br/>", "1234a3211");
    //删除脚本
    Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
    //删除HTML
    Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
    Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
    Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
    Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);

    Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
    Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
    Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
    Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
    Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", "", RegexOptions.IgnoreCase);
    Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
    Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
    Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
    Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
    Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);

    Htmlstring.Replace("<", "");
    Htmlstring.Replace(">", "");
    Htmlstring.Replace("\r\n", "");
    Htmlstring = Htmlstring.Replace("1234a3211", "<br/>");

    for (int i = 0; i < img.Length; i++)
    {
    if (img[i] == null)
    break;
    Htmlstring = Htmlstring.Replace("img无敌" + i, img[i]);
    }

    return Htmlstring;
    }
  • 相关阅读:
    理解inode
    贝叶斯公式与拼写检查器
    《C程序设计语言》第四章 函数和程序结构
    MIT《计算机科学与编程导论》课堂笔记
    很牛的牛顿迭代法
    开发一个小工具重温C#经典问题
    斯坦福《编程方法学》环境搭建及常见问题
    看Sybase官方手册学索引工作原理
    学习编程的方法、软件和工具
    大师里奇留给了我们什么
  • 原文地址:https://www.cnblogs.com/zhoudemo/p/2323252.html
Copyright © 2011-2022 走看看