zoukankan      html  css  js  c++  java
  • 新人补钙系列教程之:正则过滤

    '功能:使用正则表示式对字符串进行替换
    复制内容到剪贴板 程序代码

    1. Function RegReplace(Str, PatternStr, RepStr)
    2.     Dim NewStr, regEx
    3.     NewStr = Str
    4.     If IsNull(NewStr) Then
    5.         RegReplace = ""
    6.         Exit Function
    7.     End If
    8.     Set regEx = New RegExp
    9.     regEx.IgnoreCase = True
    10.     regEx.Global = True
    11.     regEx.Pattern = PatternStr
    12.     NewStr = regEx.Replace(NewStr, RepStr)
    13.     RegReplace = NewStr
    14. End Function
    复制代码

    '过滤HTML各种标签样式脚本

    1. Function HTMLFilter(sHTML, sFilters)
    2.     If sHTML & "" = "" Then Exit Function
    3.     If sFilters & "" = "" Then sFilters = "SCRIPT,OBJECT"
    4.     Dim aFilters
    5.      aFilters = Split(UCase(sFilters), ",")
    6.     For i = 0 To UBound(aFilters)
    7.         Select Case UCase(Trim(aFilters(i)))
    8.             Case "JORKIN"
    9.                 Do While InStr(sHTML, "     ") >0
    10.                     sHTML = Replace(sHTML, "     ", "    ")
    11.                 Loop
    12.             Case "SCRIPT"
    13.                 '// 去除脚本<scr ipt></scr ipt>及 onload 等
    14.                 sHTML = RegReplace(sHTML, "<SCRIPT[\s\S]*?</SCRIPT>", "")
    15.                 sHTML = RegReplace(sHTML, "\s[on].+?=\s+?([\""|\'])(.*?)\1", "")
    16.                 sHTML = RegReplace(sHTML, "(JAVASCRIPT|JSCRIPT|VBSCRIPT|VBS):", "$1:")
    17.             Case "FIXIMG"
    18.                 sHTML = RegReplace(sHTML, "<IMG.*?\sSRC=([^\""\'\s][^\""\'\s>]*).*?>", "<img src=$2 border=0>")
    19.                 sHTML = RegReplace(sHTML, "<IMG.*SRC=([\""\']?)(.\1\S+).*?>", "<img src=$2 border=0>")
    20.             Case "TABLE"
    21.                 '// 去除表格<table><tr><td><th>
    22.                 sHTML = RegReplace(sHTML, "</?TABLE[^>]*>", "")
    23.                 sHTML = RegReplace(sHTML, "</?TBODY[^>]*>", "")
    24.                 sHTML = RegReplace(sHTML, "<(/?)TR[^>]*>", "<$1p>")
    25.                 sHTML = RegReplace(sHTML, "</?TH[^>]*>", " ")
    26.                 sHTML = RegReplace(sHTML, "</?TD[^>]*>", " ")
    27.             Case "CLASS"
    28.                 '// 去除样式类class=""
    29.                 sHTML = RegReplace(sHTML, "(<[^>]+) CLASS=[^ |^>]+([^>]*>)", "$1 $2")
    30.                 sHTML = RegReplace(sHTML, "\sCLASS\s*?=\s*?([\""|\'])(.*?)\1", "")
    31.             Case "STYLE"
    32.                 '// 去除样式
    33.                 sHTML = RegReplace(sHTML, "(<[^>]+) STYLE=[^ |^>]+([^>]*>)", "$1 $2")
    34.                 sHTML = RegReplace(sHTML, "\sSTYLE\s*?=\s*?([\""|\'])(.*?)\1", "")
    35.             Case "XML"
    36.                 '// 去除XML<?xml>
    37.                 sHTML = RegReplace(sHTML, "<\\?XML[^>]*>", "")
    38.             Case "NAMESPACE"
    39.                 '// 去除命名空间<o:p></o:p>
    40.                 sHTML = RegReplace(sHTML, "<\/?[a-z]+:[^>]*>", "")
    41.             Case "FONT"
    42.                 '// 去除字体<font></font>
    43.                 sHTML = RegReplace(sHTML, "</?FONT[^>]*>", "")
    44.             Case "MARQUEE"
    45.                 '// 去除字幕<marquee></marquee>
    46.                 sHTML = RegReplace(sHTML, "</?MARQUEE[^>]*>", "")
    47.             Case "OBJECT"
    48.                 '// 去除对象<object><param><embed></object>
    49.                 sHTML = RegReplace(sHTML, "</?OBJECT[^>]*>", "")
    50.                 sHTML = RegReplace(sHTML, "</?PARAM[^>]*>", "")
    51.                 sHTML = RegReplace(sHTML, "</?EMBED[^>]*>", "")
    52.             Case "COMMENT"
    53.                 '// 去除HTML注释, 会处理<script>和<style>内注释, 慎用
    54.                 sHTML = RegReplace(sHTML, "<!--[\s\S]*?-->", "")
    55.             Case Else
    56.                 '// 去除其它标签
    57.                 sHTML = RegReplace(sHTML, "</?" & aFilters(i) & "[^>]*?>", "")
    58.         End Select
    59.     Next
    60.     HTMLFilter = sHTML
    61. End Function
    复制代码

    过滤全部html

    复制内容到剪贴板 程序代码

    <\/*[^<>]*>


    过滤 style

    复制内容到剪贴板 程序代码

    (<style)+[^<>]*>[^\0]*(<\/style>)+


    过滤 层 div

    复制内容到剪贴板 程序代码

    <(\/){0,1}div[^<>]*>


    过滤 链接 a :

    复制内容到剪贴板 程序代码

    <(\/){0,1}a[^<>]*>


    过滤 字体 font

    复制内容到剪贴板 程序代码

    <(\/){0,1}font[^<>]*>


    过滤 span 

    复制内容到剪贴板 程序代码

    <(\/){0,1}span[^<>]*>


    过滤 object 

    复制内容到剪贴板 程序代码

    <object.*?/object>

    过滤 iframe

    复制内容到剪贴板 程序代码

    (<iframe){1,}[^<>]*>[^\0]*(<\/iframe>){1,}

    过滤 script:

    复制内容到剪贴板 程序代码

    (<script){1,}[^<>]*>[^\0]*(<\/script>){1,}


    过滤 Class 

    复制内容到剪贴板 程序代码

    (class=){1,}(""|\'){0,1}\S+(""|\'|>|\s){0,1}过滤 style 和 strong

    复制内容到剪贴板 程序代码

    <(style|strong)[^>]*>|<\/(style|strong)>

    过滤 img

    复制内容到剪贴板 程序代码

    <(img)[^>]*>|<\/(img)>


    过滤 table tr td 等

    复制内容到剪贴板 程序代码

    <(table|tbody|tr|td|th)[^>]*>|<\/(table|tbody|tr|td|th)>


    过滤

    复制内容到剪贴板 程序代码

    <(div|blockquote|fieldset|legend)[^>]*>|<\/(div|blockquote|fieldset|legend)>


    过滤


    复制内容到剪贴板 程序代码

    <(font|i|u|h[1-9]|s)[^>]*>|<\/(font|i|u|h[1-9]|s)>


    过滤


    复制内容到剪贴板 程序代码

    <(style|strong)[^>]*>|<\/(style|strong)>


    过滤

    复制内容到剪贴板 程序代码

    <a[^>]*>|<\/a>


    过滤

    复制内容到剪贴板 程序代码

    <(meta|iframe|frame|span|tbody|layer)[^>]*>|<\/(iframe|frame|meta|span|tbody|layer)>


    过滤

    复制内容到剪贴板 程序代码

    <br[^>]*

  • 相关阅读:
    数据倾斜原理及解决方案
    删除emp_no重复的记录,只保留最小的id对应的记录
    理解HBase面向列存储
    给数据库用户授权(对象多为系统表,如dba可以查看的表)
    SpringBoot里的一些注解
    01背包
    【转】简说GNU, GCC and MinGW (Lu Hongling)
    费马小定理
    欧拉定理
    【转】C中的静态存储区和动态存储区
  • 原文地址:https://www.cnblogs.com/keng333/p/2299769.html
Copyright © 2011-2022 走看看