zoukankan      html  css  js  c++  java
  • C#去掉HTML标记

    该方法亲测可行,下面直接粘贴代码.

    public string RemoveHTMLTags(string htmlStream)
            {
                if (htmlStream == null)
                {
                    throw new Exception("Your input html stream is null!");
                    return null;
                }
                /*
                 * 最好把所有的特殊HTML标记都找出来,然后把与其相对应的Unicode字符一起影射到Hash表内,最后一起都替换掉
                 */
                //先单独测试,成功后,再把所有模式合并
                //注:这两个必须单独处理
                //去掉嵌套了HTML标记的JavaScript:(<script)[\s\S]*(</script>)
                //去掉css标记:(<style)[\s\S]*(</style>)
                //去掉css标记:\..*\{[\s\S]*\}
                htmlStream = Regex.Replace(htmlStream, "(<script)[\s\S]*?(</script>)|(<style)[\s\S]*?(</style>)", " ", RegexOptions.IgnoreCase);
                //htmlStream = RemoveTag(htmlStream, "script");
                //htmlStream = RemoveTag(htmlStream, "style");
                //去掉普通HTML标记:<[^>]+>
                //替换空格:&nbsp;|&amp;|&shy;|&#160;|&#173;
                htmlStream = Regex.Replace(htmlStream, "<[^>]+>|&nbsp;|&amp;|&shy;|&#160;|&#173;|&bull;|&lt;|&gt;", " ", RegexOptions.IgnoreCase);
                //htmlStream = RemoveTag(htmlStream);
                //替换左尖括号
                //htmlStream = Regex.Replace(htmlStream, "&lt;", "<");
                //替换右尖括号
                //htmlStream = Regex.Replace(htmlStream, "&gt;", ">");
                //替换空行
                //htmlStream = Regex.Replace(htmlStream, "[
    |
    |	]", " ");//[
    |
    ][	*| *]*[
    |
    ]
                htmlStream = Regex.Replace(htmlStream, "(
    [
    |
    |	| ]*
    )|(
    [
    |
    |	| ]*
    )", "
    ");
                htmlStream = Regex.Replace(htmlStream, "[	| ]{1,}", " ");
                return htmlStream.Trim();
            }
  • 相关阅读:
    Python打包之pyinstaller
    Python关于Pyqt
    Python撰写mail
    后台管理左侧菜单
    全选反选以及取消
    模态对话框
    Spark2.3.1版本全分布模式的安装与部署
    Kafka消息格式及多版本支持
    2019-11-18-plot作图
    剑指Offer-知识迁移能力53-59
  • 原文地址:https://www.cnblogs.com/xiangzhong/p/5333501.html
Copyright © 2011-2022 走看看