zoukankan      html  css  js  c++  java
  • excel批量提取网页标题

    最近时间比较忙,有时候很多网页需要临时保存,以便空闲的时候查看。单纯的保存网页链接会让人很枯燥,所以需要自动批量提取标题。

    为了这个小功能去写个小程序有点不划算,所以就利用excel实现了这个功能。

    先上图:

    代码如下:

     1 Option Explicit
     2 
     3 
     4 Public Function GetTitle(url As String)
     5     Dim xmlHttp As Object
     6     Dim strHtml As String
     7     
     8     url = Trim(url)
     9     
    10     If LCase(Left(url, 5)) = "https" Then
    11     
    12         GetTitle = "暂不支持https协议"
    13         Exit Function
    14     End If
    15     
    16     
    17     '都不能构成完整的http协议,起码也得 a.cc
    18     If Len(url) < 5 Then
    19         Exit Function
    20     End If
    21     
    22     
    23     url = "http://" & Replace(Trim(url), "http://", "")
    24     
    25     Set xmlHttp = CreateObject("Microsoft.XMLHTTP")
    26     xmlHttp.Open "GET", url, True
    27     xmlHttp.send (Null)
    28     While xmlHttp.ReadyState <> 4
    29         DoEvents
    30     Wend
    31     strHtml = LCase(BytesToBstr(xmlHttp.responseBody))
    32     GetTitle = Split(Split(strHtml, "<title>")(1), "</title>")(0)
    33     Set xmlHttp = Nothing
    34 End Function
    35 
    36 Private Function BytesToBstr(Bytes)
    37     Dim Unicode As String
    38     If IsUTF8(Bytes) Then '如果不是UTF-8编码则按照GB2312来处理
    39         Unicode = "UTF-8"
    40     Else
    41         Unicode = "GB2312"
    42     End If
    43 
    44     Dim objstream As Object
    45     Set objstream = CreateObject("ADODB.Stream")
    46     With objstream
    47         .Type = 1
    48         .Mode = 3
    49         .Open
    50         .Write Bytes
    51         .Position = 0
    52         .Type = 2
    53         .Charset = Unicode
    54         BytesToBstr = .ReadText
    55        .Close
    56     End With
    57     Set objstream = Nothing
    58 End Function
    59 
    60  '判断网页编码函数
    61 Private Function IsUTF8(Bytes) As Boolean
    62         Dim i As Long, AscN As Long, Length As Long
    63         Length = UBound(Bytes) + 1
    64        
    65         If Length < 3 Then
    66             IsUTF8 = False
    67             Exit Function
    68         ElseIf Bytes(0) = &HEF And Bytes(1) = &HBB And Bytes(2) = &HBF Then
    69             IsUTF8 = True
    70             Exit Function
    71         End If
    72 
    73         Do While i <= Length - 1
    74             If Bytes(i) < 128 Then
    75                 i = i + 1
    76                 AscN = AscN + 1
    77             ElseIf (Bytes(i) And &HE0) = &HC0 And (Bytes(i + 1) And &HC0) = &H80 Then
    78                 i = i + 2
    79 
    80             ElseIf i + 2 < Length Then
    81                 If (Bytes(i) And &HF0) = &HE0 And (Bytes(i + 1) And &HC0) = &H80 And (Bytes(i + 2) And &HC0) = &H80 Then
    82                      i = i + 3
    83                 Else
    84                     IsUTF8 = False
    85                     Exit Function
    86                 End If
    87             Else
    88                 IsUTF8 = False
    89                 Exit Function
    90             End If
    91         Loop
    92                
    93         If AscN = Length Then
    94             IsUTF8 = False
    95         Else
    96             IsUTF8 = True
    97         End If
    98 
    99 End Function

    【说明】:因为目前保存的网页都是文章类型,所以就直接避免处理https安全连接了。

    相关知识点:excel批量提取网页标题,excel自动提取网页标题,vb自动识别网页编码,vb字符串utf8转gbk

  • 相关阅读:
    自动化流程完成打包 IPA 到 上传 AppStore(部分)
    dex2jar jd_jui 反编译apk
    mac 系统常用小工具
    从 Jira page 上获取信息和下载附件
    python zip文件处理 之 zipfile模块
    showDoc 自动创建文档分析
    Unable to install ‘*****’
    自动化流程完成 打包 IPA 到 上传 AppStore 之 iOS IPA签名
    灰度图片和灰度颜色(代码里面只是一些相关的方法替换按需选取几个就好)
    Mac开发一些好的软件
  • 原文地址:https://www.cnblogs.com/lovelp/p/3662827.html
Copyright © 2011-2022 走看看