zoukankan      html  css  js  c++  java
  • 爬去网页离线数据

    重命名文件

      On Error Resume Next
        Dim dd As String
        Dim k%

        ''提取文件夹名称
        dd = Dir(Sheets("Sheet1").Cells(1, 2) & "*", vbDirectory)
        Do
            dd = Dir
            ''判断是否为文件夹
            If dd <> "" And InStr(1, dd, ".") = 0 Then
                Dim aa
                Set aa = CreateObject("Scripting.FileSystemObject")
                k = k + 1
                ''文件夹重命名
                aa.MoveFolder Sheets("Sheet1").Cells(1, 2) & dd, Sheets("Sheet1").Cells(1, 2) & "改名" & k
            End If

        Loop Until Len(dd) = 0
        Set aa = Nothing

    ‘爬去数据

      Dim arr, brr, i%, s$, html, Ta, n%, j%, str$, Url$, Db, tr, td
        tempPath = Cells(1, 2)
        If Mid(tempPath, Len(tempPath), 1) <> "" Then
            tempPath = tempPath & ""
        End If
        If Dir(tempPath, vbDirectory) = "" Then
            MsgBox "错误!需要处理的文件目录不存在 " & tempPath
            Exit Sub
        End If
        Dim fn
        fn = Dir(tempPath & "*.htm")
        Do While fn <> ""
            Set html = CreateObject("htmlfile")
            dataTxt = GetCode("UTF-8", tempPath & fn) 'tempPath & fn
            html.body.innerhtml = dataTxt
            If (InStr(dataTxt, "tooltip-title") = 0) Then
                oneclick dataTxt, onenum
                onenum = onenum + 1
            Else
               twoclick dataTxt, twonum
               twonum = twonum + 1
            End If
          fn = Dir()
        Loop

    'table数据

    Set Db = html.all.tags("table")(3)
             i = 0: j = 0
             For Each tr In Db.Rows
                m = 0
                i = i + 1: j = 0
                If i > 1 Then
                    For Each td In tr.Cells
                        m = m + 1
                        Sheets("Sheet3").Cells(m, pagenum) = Replace(td.innerText, Chr(10) & Chr(10), Chr(10))
                    Next
                End If

        pagenum = pagenum + 1

    Next

    '页面编码

    Public Function GetCode(CodeBase, Url) '第一个参数是设置编码方式(GB2312或UTF-8)第二个参数是地址.
        Dim xmlHTTP1
        Set xmlHTTP1 = CreateObject("Microsoft.XMLHTTP")
        xmlHTTP1.Open "get", Url, True
        xmlHTTP1.send
        While xmlHTTP1.readyState <> 4
        DoEvents
        Wend
        GetCode = xmlHTTP1.responseBody
        If CStr(GetCode) <> "" Then GetCode = BytesToBstr(GetCode, CodeBase)
        Set xmlHTTP1 = Nothing
    End Function


    Public Function BytesToBstr(strBody, CodeBase)
        Dim ObjStream
        Set ObjStream = CreateObject("Adodb.Stream")
        With ObjStream
        .Type = 1
        .Mode = 3
        .Open
        .write strBody
        .Position = 0
        .Type = 2
        .Charset = CodeBase
        BytesToBstr = .ReadText
        .Close
        End With
        Set ObjStream = Nothing
    End Function

  • 相关阅读:
    微软同步发行Windows 10和Windows 10 Mobile系统更新
    MySQL5.5中文支持
    sqlplus登入和plsql登入的差别
    SQL Server统计信息:问题和解决方式
    OSX: 命令行制作U盘Recovery HD
    Android 使用SwipeBackLayout实现滑动返回上一级页面——实战来袭
    JSP常见的三个编译指令
    matlab-非线性方程求根函数及函数曲线绘制
    走进windows编程的世界-----消息处理函数(3)
    LoadRunner利用ODBC编写MySql脚本
  • 原文地址:https://www.cnblogs.com/bignine/p/10090542.html
Copyright © 2011-2022 走看看