'功能:使用正则表示式对字符串进行替换
复制内容到剪贴板 程序代码
- Function RegReplace(Str, PatternStr, RepStr)
- Dim NewStr, regEx
- NewStr = Str
- If IsNull(NewStr) Then
- RegReplace = ""
- Exit Function
- End If
- Set regEx = New RegExp
- regEx.IgnoreCase = True
- regEx.Global = True
- regEx.Pattern = PatternStr
- NewStr = regEx.Replace(NewStr, RepStr)
- RegReplace = NewStr
- End Function
'过滤HTML各种标签样式脚本
- Function HTMLFilter(sHTML, sFilters)
- If sHTML & "" = "" Then Exit Function
- If sFilters & "" = "" Then sFilters = "SCRIPT,OBJECT"
- Dim aFilters
- aFilters = Split(UCase(sFilters), ",")
- For i = 0 To UBound(aFilters)
- Select Case UCase(Trim(aFilters(i)))
- Case "JORKIN"
- Do While InStr(sHTML, " ") >0
- sHTML = Replace(sHTML, " ", " ")
- Loop
- Case "SCRIPT"
- '// 去除脚本<scr ipt></scr ipt>及 onload 等
- sHTML = RegReplace(sHTML, "<SCRIPT[\s\S]*?</SCRIPT>", "")
- sHTML = RegReplace(sHTML, "\s[on].+?=\s+?([\""|\'])(.*?)\1", "")
- sHTML = RegReplace(sHTML, "(JAVASCRIPT|JSCRIPT|VBSCRIPT|VBS):", "$1:")
- Case "FIXIMG"
- sHTML = RegReplace(sHTML, "<IMG.*?\sSRC=([^\""\'\s][^\""\'\s>]*).*?>", "<img src=$2 border=0>")
- sHTML = RegReplace(sHTML, "<IMG.*SRC=([\""\']?)(.\1\S+).*?>", "<img src=$2 border=0>")
- Case "TABLE"
- '// 去除表格<table><tr><td><th>
- sHTML = RegReplace(sHTML, "</?TABLE[^>]*>", "")
- sHTML = RegReplace(sHTML, "</?TBODY[^>]*>", "")
- sHTML = RegReplace(sHTML, "<(/?)TR[^>]*>", "<$1p>")
- sHTML = RegReplace(sHTML, "</?TH[^>]*>", " ")
- sHTML = RegReplace(sHTML, "</?TD[^>]*>", " ")
- Case "CLASS"
- '// 去除样式类class=""
- sHTML = RegReplace(sHTML, "(<[^>]+) CLASS=[^ |^>]+([^>]*>)", "$1 $2")
- sHTML = RegReplace(sHTML, "\sCLASS\s*?=\s*?([\""|\'])(.*?)\1", "")
- Case "STYLE"
- '// 去除样式
- sHTML = RegReplace(sHTML, "(<[^>]+) STYLE=[^ |^>]+([^>]*>)", "$1 $2")
- sHTML = RegReplace(sHTML, "\sSTYLE\s*?=\s*?([\""|\'])(.*?)\1", "")
- Case "XML"
- '// 去除XML<?xml>
- sHTML = RegReplace(sHTML, "<\\?XML[^>]*>", "")
- Case "NAMESPACE"
- '// 去除命名空间<o:p></o:p>
- sHTML = RegReplace(sHTML, "<\/?[a-z]+:[^>]*>", "")
- Case "FONT"
- '// 去除字体<font></font>
- sHTML = RegReplace(sHTML, "</?FONT[^>]*>", "")
- Case "MARQUEE"
- '// 去除字幕<marquee></marquee>
- sHTML = RegReplace(sHTML, "</?MARQUEE[^>]*>", "")
- Case "OBJECT"
- '// 去除对象<object><param><embed></object>
- sHTML = RegReplace(sHTML, "</?OBJECT[^>]*>", "")
- sHTML = RegReplace(sHTML, "</?PARAM[^>]*>", "")
- sHTML = RegReplace(sHTML, "</?EMBED[^>]*>", "")
- Case "COMMENT"
- '// 去除HTML注释, 会处理<script>和<style>内注释, 慎用
- sHTML = RegReplace(sHTML, "<!--[\s\S]*?-->", "")
- Case Else
- '// 去除其它标签
- sHTML = RegReplace(sHTML, "</?" & aFilters(i) & "[^>]*?>", "")
- End Select
- Next
- HTMLFilter = sHTML
- End Function
过滤全部html
复制内容到剪贴板 程序代码
<\/*[^<>]*>
过滤 style
复制内容到剪贴板 程序代码
(<style)+[^<>]*>[^\0]*(<\/style>)+
过滤 层 div
复制内容到剪贴板 程序代码
<(\/){0,1}div[^<>]*>
过滤 链接 a :
复制内容到剪贴板 程序代码
<(\/){0,1}a[^<>]*>
过滤 字体 font
复制内容到剪贴板 程序代码
<(\/){0,1}font[^<>]*>
过滤 span
复制内容到剪贴板 程序代码
<(\/){0,1}span[^<>]*>
过滤 object
复制内容到剪贴板 程序代码
<object.*?/object>
过滤 iframe
复制内容到剪贴板 程序代码
(<iframe){1,}[^<>]*>[^\0]*(<\/iframe>){1,}
过滤 script:
复制内容到剪贴板 程序代码
(<script){1,}[^<>]*>[^\0]*(<\/script>){1,}
过滤 Class
复制内容到剪贴板 程序代码
(class=){1,}(""|\'){0,1}\S+(""|\'|>|\s){0,1}过滤 style 和 strong
复制内容到剪贴板 程序代码
<(style|strong)[^>]*>|<\/(style|strong)>
过滤 img
复制内容到剪贴板 程序代码
<(img)[^>]*>|<\/(img)>
过滤 table tr td 等
复制内容到剪贴板 程序代码
<(table|tbody|tr|td|th)[^>]*>|<\/(table|tbody|tr|td|th)>
过滤
复制内容到剪贴板 程序代码
<(div|blockquote|fieldset|legend)[^>]*>|<\/(div|blockquote|fieldset|legend)>
过滤
复制内容到剪贴板 程序代码
<(font|i|u|h[1-9]|s)[^>]*>|<\/(font|i|u|h[1-9]|s)>
过滤
复制内容到剪贴板 程序代码
<(style|strong)[^>]*>|<\/(style|strong)>
过滤
复制内容到剪贴板 程序代码
<a[^>]*>|<\/a>
过滤
复制内容到剪贴板 程序代码
<(meta|iframe|frame|span|tbody|layer)[^>]*>|<\/(iframe|frame|meta|span|tbody|layer)>
过滤
复制内容到剪贴板 程序代码
<br[^>]*