zoukankan      html  css  js  c++  java
  • 正则替换 html

    正则表达式,去除所有HTML标签

     protected string str = "<table><tr><td>sdasasdsdd</td></tr></table><br><p>sds</p><img id='img1' src='http://www.baidu.com/img/baidu_logo.gif' width='100' height='50' alt=''>aaassss<br><img src='http://www.baidu.com/img/baidu_logo.gif' width='100' height='50' alt=''> 说是道 ";

        protected void Page_Load(object sender, EventArgs e)
        {

            //string regexstr = @"<[^>]*>";    //去除所有的标签

            //@"<script[^>]*?>.*?</script >" //去除所有脚本,中间部分也删除
            
            // string regexstr = @"<img[^>]*>";   //去除图片的正则

           // string regexstr = @"<(?!br).*?>";   //去除所有标签,只剩br

            // string regexstr = @"<table[^>]*?>.*?</table>";   //去除table里面的所有内容

            string regexstr = @"<(?!img|br|p|/p).*?>";   //去除所有标签,只剩img,br,p
      
            str = Regex.Replace(str, regexstr, string.Empty, RegexOptions.IgnoreCase);

        }

    asp中正则表达式去除HTML标记(窃自eWebEditor)
    2009年12月31日 星期四 下午 12:40
    function ExecReg(re, content)
        
    Dim myRegExp, ResultString
        
    Set myRegExp = New RegExp
         myRegExp.Global
    = True
         myRegExp.Pattern
    = re
         ResultString
    = myRegExp.Replace(content, "" )
         ExecReg
    = ResultString
    end function

    function DecodeFilter(html)
             html
    = LCase (html)
    ' 去除所有客户端脚本javascipt,vbscript,jscript,js,vbs,event,
             html = ExecReg( " </?script[^>]*> " , html)
             html
    = ExecReg( " (javascript|jscript|vbscript|vbs): " , html)
             html
    = ExecReg( " on(mouse|exit|error|click|key) " , html)
             html
    = ExecReg( " &# " , html)
    ' 去除表格<table><tr><td><th><a><p><img><div>
             html = ExecReg( " </?table[^>]*> " , html)
             html
    = ExecReg( " </?tr[^>]*> " , html)
             html
    = ExecReg( " </?th[^>]*> " , html)
             html
    = ExecReg( " </?td[^>]*> " , html)
             html
    = ExecReg( " </?a[^>]*> " , html)
             html
    = ExecReg( " </?p[^>]*> " , html)
             html
    = ExecReg( " </?img[^>]*> " , html)
             html
    = ExecReg( " </?div[^>]*> " , html)
             html
    = ExecReg( " </?ul[^>]*> " , html)
             html
    = ExecReg( " </?li[^>]*> " , html)
             html
    = ExecReg( " </?tbody[^>]*> " , html)
             html
    = ExecReg( " </?h1[^>]*> " , html)
             html
    = ExecReg( " </?h2[^>]*> " , html)
             html
    = ExecReg( " </?h3[^>]*> " , html)
             html
    = ExecReg( " </?h4[^>]*> " , html)
             html
    = ExecReg( " </?h5[^>]*> " , html)
             html
    = ExecReg( " </?h6[^>]*> " , html)
             html
    = ExecReg( " </?b[^>]*> " , html)
             html
    = ExecReg( " </?strong[^>]*> " , html)
    ' 去除样式类class=""
             html = ExecReg( " (<[^>]+) class=[^ |^>]*([^>]*>) " , html)
    ' 去除样式style=""
             html = ExecReg( " (<[^>]+) style=""[^""]*""([^>]*>) " ,   html)
    ' 去除XML<?xml>
             html = ExecReg( " </?xml[^>]*> " , html)
    ' 去除命名空间<o:p></o:p>
             html = ExecReg( " </?[a-z]+:[^>]*> " , html)
    ' 去除字体<font></font>
             html = ExecReg( " </?font[^>]*> " , html)
    ' 去除字幕<marquee></marquee>
             html = ExecReg( " </?marquee[^>]*> " , html)
    ' 去除对象<object><param><embed></object>
             html = ExecReg( " </?object[^>]*> " ,   html)
             html
    = ExecReg( " </?param[^>]*> " , html)
             html
    = ExecReg( " </?embed[^>]*> " , html)
             DecodeFilter
    = html
    end function



    Function RemoveHTML(strText)
     Dim RegEx
     Set RegEx = New RegExp
     RegEx.Pattern = "<[^>]*>"
     RegEx.Global = True
     RemoveHTML = RegEx.Replace(strText, "")
    End Function


    function nohtml(str)
    dim re
    Set re=new RegExp
    re.IgnoreCase =true
    re.Global=True
    re.Pattern="(/<.[^/<]*/>)"
    str=re.replace(str," ")
    re.Pattern="(/<//[^/<]*/>)"
    str=re.replace(str," ")
    str=replace(str," ","")
    str=replace(str," ","")
    nohtml=str
    set re=nothing
    end function
  • 相关阅读:
    1Z0-053 争议题目解析
    Vertica 高可用性测试
    Vertica 导出数据测试用例
    Oracle安装部署,版本升级,应用补丁快速参考
    记录一则数据库连接故障ORA-12560,ORA-12518
    记录一则RMAN备份策略修正案例
    oracle 存储过程 变量的声明和赋值的3种方式
    tomcat 如何查看tomcat版本及位数
    64位Ubuntu 14.04 安装wps
    图表中怎样实现动态变更分类轴与系列值
  • 原文地址:https://www.cnblogs.com/y0umer/p/3839386.html
Copyright © 2011-2022 走看看