zoukankan      html  css  js  c++  java
  • Excel Vba 过滤主域名

    Sub FilterSiteDomain()
    Dim siteUrl As String
    Dim siteDomain As String
    Dim siteMainDomain As String
    siteUrl = "http://www.baidu.com/a/b/index.html"
    '获取URL域名
    siteDomain = GetSiteByUrl(siteUrl)
    MsgBox siteDomain
    '获取URL主域名
    siteMainDomain = GetSiteDomain(siteDomain)
    MsgBox siteMainDomain
    End Sub


    '返回主域名
    Function GetSiteDomain(siteDomain As String)
    Dim domain As String

    '将获取的域名转换为小写
    domain = LCase(siteDomain)

    If InStr(domain, ".") > 0 Then
    Dim domainArr() As String
    domainArr = Split(domain, ".")

    Dim lastStr As String
    lastStr = domainArr(UBound(domainArr))
    If IsNumeric(lastStr) Then
    GetSiteDomain = Replace(domain, ".", "")
    Else
    Dim domainRules() As String
    domainRules = Split(".com.cn|.net.cn|.org.cn|.gov.cn|.com|.net|.cn|.org|.cc|.me|.tel|.mobi|.asia|.biz|.info|.name|.tv|.hk|.公司|.中国|.网络", "|")
    Dim findStr As String
    Dim replaceStr As String
    Dim returnStr As String
    findStr = ""
    replaceStr = ""
    returnStr = ""

    Dim i As Integer
    For i = 0 To UBound(domainRules)
    '如果最后有找到匹配项
    If EndsWith(domain, LCase(domainRules(i))) Then
    'www.baidu.com
    findStr = domainRules(i)
    '将匹配项替换为空,便于再次判断
    replaceStr = Replace(domain, findStr, "")
    '存在二级域名或者三级,比如:www.baidu
    If InStr(replaceStr, ".") > 0 Then
    Dim replaceArr() As String
    'www baidu
    replaceArr = Split(replaceStr, ".")
    returnStr = replaceArr(UBound(replaceArr)) + findStr
    'GetSiteDomain = returnStr
    Exit For
    Else 'baidu
    '连接起来输出为:baidu.com
    returnStr = replaceStr + findStr
    'GetSiteDomain = returnStr
    Exit For
    End If
    Else
    returnStr = domain
    End If

    Next i

    GetSiteDomain = returnStr

    End If

    Else
    GetSiteDomain = domain
    End If


    End Function

    '返回协议、域名、端口号、页面
    Function GetSiteByUrl(url As String) As String
    Dim regex As Object
    Set regex = CreateObject("vbscript.regexp")
    regex.Global = True
    regex.Pattern = "(\w+)://([^/:]+)(:\d*)?([^# ]*)"
    'MsgBox regex.Replace(url, "使用协议/主域名/端口号/页面:[$1],[$2],[$3],[$4]")
    GetSiteByUrl = regex.Replace(url, "$2")
    End Function


    'strTarget是否以strCom开始
    Function StartsWith(strTarget As String, strCom As String) As Boolean
    StartsWith = (Left(strTarget, Len(strCom)) = strCom)
    End Function

    'strTarget是否以strCom结束
    Function EndsWith(strTarget As String, strCom As String) As Boolean
    EndsWith = (Right(strTarget, Len(strCom)) = strCom)
    End Function
  • 相关阅读:
    JVM原理---------------1.开篇
    mysql开启事务的方式,命令学习
    mysql中的锁
    mysql索引底层原理
    mysql的常见存储引擎与常见日志类型,以及4种线程的作用
    Mutex
    委托和匿名委托
    线程通信
    同步锁
    [ValidateInput(false)]
  • 原文地址:https://www.cnblogs.com/blackcore/p/2232894.html
Copyright © 2011-2022 走看看