zoukankan      html  css  js  c++  java
  • Excel Vba 过滤主域名

    Sub FilterSiteDomain()
    Dim siteUrl As String
    Dim siteDomain As String
    Dim siteMainDomain As String
    siteUrl = "http://www.baidu.com/a/b/index.html"
    '获取URL域名
    siteDomain = GetSiteByUrl(siteUrl)
    MsgBox siteDomain
    '获取URL主域名
    siteMainDomain = GetSiteDomain(siteDomain)
    MsgBox siteMainDomain
    End Sub


    '返回主域名
    Function GetSiteDomain(siteDomain As String)
    Dim domain As String

    '将获取的域名转换为小写
    domain = LCase(siteDomain)

    If InStr(domain, ".") > 0 Then
    Dim domainArr() As String
    domainArr = Split(domain, ".")

    Dim lastStr As String
    lastStr = domainArr(UBound(domainArr))
    If IsNumeric(lastStr) Then
    GetSiteDomain = Replace(domain, ".", "")
    Else
    Dim domainRules() As String
    domainRules = Split(".com.cn|.net.cn|.org.cn|.gov.cn|.com|.net|.cn|.org|.cc|.me|.tel|.mobi|.asia|.biz|.info|.name|.tv|.hk|.公司|.中国|.网络", "|")
    Dim findStr As String
    Dim replaceStr As String
    Dim returnStr As String
    findStr = ""
    replaceStr = ""
    returnStr = ""

    Dim i As Integer
    For i = 0 To UBound(domainRules)
    '如果最后有找到匹配项
    If EndsWith(domain, LCase(domainRules(i))) Then
    'www.baidu.com
    findStr = domainRules(i)
    '将匹配项替换为空,便于再次判断
    replaceStr = Replace(domain, findStr, "")
    '存在二级域名或者三级,比如:www.baidu
    If InStr(replaceStr, ".") > 0 Then
    Dim replaceArr() As String
    'www baidu
    replaceArr = Split(replaceStr, ".")
    returnStr = replaceArr(UBound(replaceArr)) + findStr
    'GetSiteDomain = returnStr
    Exit For
    Else 'baidu
    '连接起来输出为:baidu.com
    returnStr = replaceStr + findStr
    'GetSiteDomain = returnStr
    Exit For
    End If
    Else
    returnStr = domain
    End If

    Next i

    GetSiteDomain = returnStr

    End If

    Else
    GetSiteDomain = domain
    End If


    End Function

    '返回协议、域名、端口号、页面
    Function GetSiteByUrl(url As String) As String
    Dim regex As Object
    Set regex = CreateObject("vbscript.regexp")
    regex.Global = True
    regex.Pattern = "(\w+)://([^/:]+)(:\d*)?([^# ]*)"
    'MsgBox regex.Replace(url, "使用协议/主域名/端口号/页面:[$1],[$2],[$3],[$4]")
    GetSiteByUrl = regex.Replace(url, "$2")
    End Function


    'strTarget是否以strCom开始
    Function StartsWith(strTarget As String, strCom As String) As Boolean
    StartsWith = (Left(strTarget, Len(strCom)) = strCom)
    End Function

    'strTarget是否以strCom结束
    Function EndsWith(strTarget As String, strCom As String) As Boolean
    EndsWith = (Right(strTarget, Len(strCom)) = strCom)
    End Function
  • 相关阅读:
    BZOJ 2654: tree
    洛谷P1972[SDOI2009]HH的项链
    洛谷 P3833 [SHOI2012]魔法树
    P2167 [SDOI2009]Bill的挑战
    洛谷 P2145 [JSOI2007]祖码
    洛谷 P4170 [CQOI2007]涂色
    P2024 [NOI2001]食物链
    USACO 2012 December ZQUOJ 24122 Scrambled Letters(二分)
    USACO 2012 December ZQUOJ 24128 Wifi Setup(动态dp)
    2013长春网赛1009 hdu 4767 Bell(矩阵快速幂+中国剩余定理)
  • 原文地址:https://www.cnblogs.com/blackcore/p/2232894.html
Copyright © 2011-2022 走看看