zoukankan      html  css  js  c++  java
  • asp采集程序类

    '==================================================
    '函数名:GetHttpPage
    '作 用:获取网页源码
    '参 数:HttpUrl ——网页地址
    '==================================================
    Function GetHttpPage(HttpUrl)
    If IsNull(HttpUrl)=True Or Len(HttpUrl)<18 Or HttpUrl="$False$" Then
    GetHttpPage="$False$"
    Exit Function
    End If
    Dim Http
    Set Http=server.createobject("MSXML2.XMLHTTP")
    Http.open "GET",HttpUrl,False
    Http.Send()
    If Http.Readystate<>4 then
    Set Http=Nothing
    GetHttpPage="$False$"
    Exit function
    End if
    GetHTTPPage=bytesToBSTR(Http.responseBody,"GB2312")
    Set Http=Nothing
    If Err.number<>0 then
    Err.Clear
    End If
    End Function

    '==================================================
    '函数名:BytesToBstr
    '作 用:将获取的源码转换为中文
    '参 数:Body ——要转换的变量
    '参 数:Cset ——要转换的类型
    '==================================================
    Function BytesToBstr(Body,Cset)
    Dim Objstream
    Set Objstream = Server.CreateObject("adodb.stream")
    objstream.Type = 1
    objstream.Mode =3
    objstream.Open
    objstream.Write body
    objstream.Position = 0
    objstream.Type = 2
    objstream.Charset = Cset
    BytesToBstr = objstream.ReadText
    objstream.Close
    set objstream = nothing
    End Function
    '==================================================
    '函数名:UrlEncoding
    '作 用:转换编码
    '==================================================
    Function UrlEncoding(DataStr)
    Dim StrReturn,Si,ThisChr,InnerCode,Hight8,Low8
    StrReturn = ""
    For Si = 1 To Len(DataStr)
    ThisChr = Mid(DataStr,Si,1)
    If Abs(Asc(ThisChr)) < &HFF Then
    StrReturn = StrReturn & ThisChr
    Else
    InnerCode = Asc(ThisChr)
    If InnerCode < 0 Then
    InnerCode = InnerCode + &H10000
    End If
    Hight8 = (InnerCode And &HFF00)\ &HFF
    Low8 = InnerCode And &HFF
    StrReturn = StrReturn & "%" & Hex(Hight8) & "%" & Hex(Low8)
    End If
    Next
    UrlEncoding = StrReturn
    End Function

    '==================================================
    '函数名:GetBody
    '作 用:截取字符串
    '参 数:ConStr ------将要截取的字符串
    '参 数:StartStr ------开始字符串
    '参 数:OverStr ------结束字符串
    '参 数:IncluL ------是否包含StartStr
    '参 数:IncluR ------是否包含OverStr
    '==================================================
    Function GetBody(ConStr,StartStr,OverStr,IncluL,IncluR)
    If ConStr="$False$" or ConStr="" or IsNull(ConStr)=True Or StartStr="" or IsNull(StartStr)=True Or OverStr="" or IsNull(OverStr)=True Then
    GetBody="$False$"
    Exit Function
    End If
    Dim ConStrTemp
    Dim Start,Over
    ConStrTemp=Lcase(ConStr)
    StartStr=Lcase(StartStr)
    OverStr=Lcase(OverStr)
    Start = InStrB(1, ConStrTemp, StartStr, vbBinaryCompare)
    If Start<=0 then
    GetBody="$False$"
    Exit Function
    Else
    If IncluL=False Then
    Start=Start+LenB(StartStr)
    End If
    End If
    Over=InStrB(Start,ConStrTemp,OverStr,vbBinaryCompare)
    If Over<=0 Or Over<=Start then
    GetBody="$False$"
    Exit Function
    Else
    If IncluR=True Then
    Over=Over+LenB(OverStr)
    End If
    End If
    GetBody=MidB(ConStr,Start,Over-Start)
    End Function

    '==================================================
    '函数名:GetArray
    '作 用:提取链接地址,以$Array$分隔
    '参 数:ConStr ------提取地址的原字符
    '参 数:StartStr ------开始字符串
    '参 数:OverStr ------结束字符串
    '参 数:IncluL ------是否包含StartStr
    '参 数:IncluR ------是否包含OverStr
    '==================================================
    Function GetArray(Byval ConStr,StartStr,OverStr,IncluL,IncluR)
    If ConStr="$False$" or ConStr="" Or IsNull(ConStr)=True or StartStr="" Or OverStr="" or IsNull(StartStr)=True Or IsNull(OverStr)=True Then
    GetArray="$False$"
    Exit Function
    End If
    Dim TempStr,TempStr2,objRegExp,Matches,Match
    TempStr=""
    Set objRegExp = New Regexp
    objRegExp.IgnoreCase = True
    objRegExp.Global = True
    objRegExp.Pattern = "("&StartStr&").+?("&OverStr&")"
    Set Matches =objRegExp.Execute(ConStr)
    For Each Match in Matches
    TempStr=TempStr & "$Array$" & Match.Value
    Next
    Set Matches=nothing

    If TempStr="" Then
    GetArray="$False$"
    Exit Function
    End If
    TempStr=Right(TempStr,Len(TempStr)-7)
    If IncluL=False then
    objRegExp.Pattern =StartStr
    TempStr=objRegExp.Replace(TempStr,"")
    End if
    If IncluR=False then
    objRegExp.Pattern =OverStr
    TempStr=objRegExp.Replace(TempStr,"")
    End if
    Set objRegExp=nothing
    Set Matches=nothing

    TempStr=Replace(TempStr,"""","")
    TempStr=Replace(TempStr,"'","")
    TempStr=Replace(TempStr," ","")
    TempStr=Replace(TempStr,"(","")
    TempStr=Replace(TempStr,")","")

    If TempStr="" then
    GetArray="$False$"
    Else
    GetArray=TempStr
    End if
    End Function

    '==================================================
    '函数名:DefiniteUrl
    '作 用:将相对地址转换为绝对地址
    '参 数:PrimitiveUrl ------要转换的相对地址
    '参 数:ConsultUrl ------当前网页地址
    '==================================================
    Function DefiniteUrl(Byval PrimitiveUrl,Byval ConsultUrl)
    Dim ConTemp,PriTemp,Pi,Ci,PriArray,ConArray
    If PrimitiveUrl="" or ConsultUrl="" or PrimitiveUrl="$False$" or ConsultUrl="$False$" Then
    DefiniteUrl="$False$"
    Exit Function
    End If
    If Left(Lcase(ConsultUrl),7)<>"http://" Then
    ConsultUrl= "http://" & ConsultUrl
    End If
    ConsultUrl=Replace(ConsultUrl,"\","/")
    ConsultUrl=Replace(ConsultUrl,"://",":\\")
    PrimitiveUrl=Replace(PrimitiveUrl,"\","/")

    If Right(ConsultUrl,1)<>"/" Then
    If Instr(ConsultUrl,"/")>0 Then
    If Instr(Right(ConsultUrl,Len(ConsultUrl)-InstrRev(ConsultUrl,"/")),".")>0 then
    Else
    ConsultUrl=ConsultUrl & "/"
    End If
    Else
    ConsultUrl=ConsultUrl & "/"
    End If
    End If
    ConArray=Split(ConsultUrl,"/")

    If Left(LCase(PrimitiveUrl),7) = "http://" then
    DefiniteUrl=Replace(PrimitiveUrl,"://",":\\")
    ElseIf Left(PrimitiveUrl,1) = "/" Then
    DefiniteUrl=ConArray(0) & PrimitiveUrl
    ElseIf Left(PrimitiveUrl,2)="./" Then
    PrimitiveUrl=Right(PrimitiveUrl,Len(PrimitiveUrl)-2)
    If Right(ConsultUrl,1)="/" Then
    DefiniteUrl=ConsultUrl & PrimitiveUrl
    Else
    DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & PrimitiveUrl
    End If
    ElseIf Left(PrimitiveUrl,3)="../" then
    Do While Left(PrimitiveUrl,3)="../"
    PrimitiveUrl=Right(PrimitiveUrl,Len(PrimitiveUrl)-3)
    Pi=Pi+1
    Loop
    For Ci=0 to (Ubound(ConArray)-1-Pi)
    If DefiniteUrl<>"" Then
    DefiniteUrl=DefiniteUrl & "/" & ConArray(Ci)
    Else
    DefiniteUrl=ConArray(Ci)
    End If
    Next
    DefiniteUrl=DefiniteUrl & "/" & PrimitiveUrl
    Else
    If Instr(PrimitiveUrl,"/")>0 Then
    PriArray=Split(PrimitiveUrl,"/")
    If Instr(PriArray(0),".")>0 Then
    If Right(PrimitiveUrl,1)="/" Then
    DefiniteUrl="http:\\" & PrimitiveUrl
    Else
    If Instr(PriArray(Ubound(PriArray)-1),".")>0 Then
    DefiniteUrl="http:\\" & PrimitiveUrl
    Else
    DefiniteUrl="http:\\" & PrimitiveUrl & "/"
    End If
    End If
    Else
    If Right(ConsultUrl,1)="/" Then
    DefiniteUrl=ConsultUrl & PrimitiveUrl
    Else
    DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & PrimitiveUrl
    End If
    End If
    Else
    If Instr(PrimitiveUrl,".")>0 Then
    If Right(ConsultUrl,1)="/" Then
    If right(LCase(PrimitiveUrl),3)=".cn" or right(LCase(PrimitiveUrl),3)="com" or right(LCase(PrimitiveUrl),3)="net" or right(LCase(PrimitiveUrl),3)="org" Then
    DefiniteUrl="http:\\" & PrimitiveUrl & "/"
    Else
    DefiniteUrl=ConsultUrl & PrimitiveUrl
    End If
    Else
    If right(LCase(PrimitiveUrl),3)=".cn" or right(LCase(PrimitiveUrl),3)="com" or right(LCase(PrimitiveUrl),3)="net" or right(LCase(PrimitiveUrl),3)="org" Then
    DefiniteUrl="http:\\" & PrimitiveUrl & "/"
    Else
    DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & "/" & PrimitiveUrl
    End If
    End If
    Else
    If Right(ConsultUrl,1)="/" Then
    DefiniteUrl=ConsultUrl & PrimitiveUrl & "/"
    Else
    DefiniteUrl=Left(ConsultUrl,InstrRev(ConsultUrl,"/")) & "/" & PrimitiveUrl & "/"
    End If
    End If
    End If
    End If
    If Left(DefiniteUrl,1)="/" then
    DefiniteUrl=Right(DefiniteUrl,Len(DefiniteUrl)-1)
    End if
    If DefiniteUrl<>"" Then
    DefiniteUrl=Replace(DefiniteUrl,"//","/")
    DefiniteUrl=Replace(DefiniteUrl,":\\","://")
    Else
    DefiniteUrl="$False$"
    End If
    End Function

    '==================================================
    '函数名:ReplaceSaveRemoteFile
    '作 用:替换、保存远程图片
    '参 数:ConStr —— 要替换的字符串
    '参 数:SaveTf —— 是否保存文件,False不保存,True保存
    '参 数: TistUrl—— 当前网页地址
    '==================================================
    Function ReplaceSaveRemoteFile(ConStr,strInstallDir,strChannelDir,SaveTf,TistUrl)
    If ConStr="$False$" or ConStr="" or strInstallDir="" or strChannelDir="" Then
    ReplaceSaveRemoteFile=ConStr
    Exit Function
    End If
    Dim TempStr,TempStr2,TempStr3,Re,Matches,Match,Tempi,TempArray,TempArray2

    Set Re = New Regexp
    Re.IgnoreCase = True
    Re.Global = True
    Re.Pattern ="]>"
    Set Matches =Re.Execute(ConStr)
    For Each Match in Matches
    If TempStr<>"" then
    TempStr=TempStr & "$Array$" & Match.Value
    Else
    TempStr=Match.Value
    End if
    Next
    If TempStr<>"" Then
    TempArray=Split(TempStr,"$Array$")
    TempStr=""
    For Tempi=0 To Ubound(TempArray)
    Re.Pattern ="src\s*=\s*.+?\.(gif|jpg|bmp|jpeg|psd|png|svg|dxf|wmf|tiff)"
    Set Matches =Re.Execute(TempArray(Tempi))
    For Each Match in Matches
    If TempStr<>"" then
    TempStr=TempStr & "$Array$" & Match.Value
    Else
    TempStr=Match.Value
    End if
    Next
    Next
    End if
    If TempStr<>"" Then
    Re.Pattern ="src\s*=\s*"
    TempStr=Re.Replace(TempStr,"")
    End If
    Set Matches=nothing
    Set Re=nothing
    If TempStr="" or IsNull(TempStr)=True Then
    ReplaceSaveRemoteFile=ConStr
    Exit function
    End if
    TempStr=Replace(TempStr,"""","")
    TempStr=Replace(TempStr,"'","")
    TempStr=Replace(TempStr," ","")

    Dim RemoteFileurl,SavePath,PathTemp,DtNow,strFileName,strFileType,ArrSaveFileName,RanNum,Arr_Path
    DtNow=Now()
    If SaveTf=True then
    '***********************************
    SavePath= strChannelDir & "/" & year(DtNow) & right("0" & month(DtNow),2) & "/"
       response.write "链接路径:" & savepath & "
    "
    Arr_Path=Split(SavePath,"/")
    PathTemp=""
    For Tempi=0 To Ubound(Arr_Path)
    If Tempi=0 Then
    PathTemp=Arr_Path(0) & "/"
    ElseIf Tempi=Ubound(Arr_Path) Then
    Exit For
    Else
    PathTemp=PathTemp & Arr_Path(Tempi) & "/"
    End If
    If CheckDir(PathTemp)=False Then
    If MakeNewsDir(PathTemp)=False Then
    SaveTf=False
    Exit For
    End If
    End If
    Next
    End If

    '去掉重复图片开始
    TempArray=Split(TempStr,"$Array$")
    TempStr=""
    For Tempi=0 To Ubound(TempArray)
    If Instr(Lcase(TempStr),Lcase(TempArray(Tempi)))<1 Then
    TempStr=TempStr & "$Array$" & TempArray(Tempi)
    End If
    Next
    TempStr=Right(TempStr,Len(TempStr)-7)
    TempArray=Split(TempStr,"$Array$")
    '去掉重复图片结束

    '转换相对图片地址开始
    TempStr=""
    For Tempi=0 To Ubound(TempArray)
    TempStr=TempStr & "$Array$" & DefiniteUrl(TempArray(Tempi),TistUrl)
    Next
    TempStr=Right(TempStr,Len(TempStr)-7)
    TempStr=Replace(TempStr,Chr(0),"")
    TempArray2=Split(TempStr,"$Array$")
    TempStr=""
    '转换相对图片地址结束

    '图片替换/保存
    Set Re = New Regexp
    Re.IgnoreCase = True
    Re.Global = True

    For Tempi=0 To Ubound(TempArray2)
    RemoteFileUrl=TempArray2(Tempi)
    If RemoteFileUrl<>"$False$" And SaveTf=True Then'保存图片
    ArrSaveFileName = Split(RemoteFileurl,".")
       strFileType=Lcase(ArrSaveFileName(Ubound(ArrSaveFileName)))'文件类型
    If strFileType="asp" or strFileType="asa" or strFileType="aspx" or strFileType="cer" or strFileType="cdx" or strFileType="exe" or strFileType="rar" or strFileType="zip" then
    UploadFiles=""
    ReplaceSaveRemoteFile=ConStr
    Exit Function
    End If

    Randomize
    RanNum=Int(900*Rnd)+100
       strFileName = year(DtNow) & right("0" & month(DtNow),2) & right("0" & day(DtNow),2) & right("0" & hour(DtNow),2) & right("0" & minute(DtNow),2) & right("0" & second(DtNow),2) & ranNum & "." & strFileType
    Re.Pattern =TempArray(Tempi)
       If SaveRemoteFile(SavePath & strFileName,RemoteFileUrl)=True Then
    '********************************
    PathTemp=SavePath & strFileName
    ConStr=Re.Replace(ConStr,PathTemp)
    Re.Pattern=strInstallDir & strChannelDir & "/"
    UploadFiles=UploadFiles & "|" & Re.Replace(SavePath &strFileName,"")
    Else
    PathTemp=RemoteFileUrl
    ConStr=Re.Replace(ConStr,PathTemp)
    'UploadFiles=UploadFiles & "|" & RemoteFileUrl
    End If
    ElseIf RemoteFileurl<>"$False$" and SaveTf=False Then'不保存图片
    Re.Pattern =TempArray(Tempi)
    ConStr=Re.Replace(ConStr,RemoteFileUrl)
    UploadFiles=UploadFiles & "|" & RemoteFileUrl
    End If
    Next
    Set Re=nothing
    If UploadFiles<>"" Then
    UploadFiles=Right(UploadFiles,Len(UploadFiles)-1)
    End If
    ReplaceSaveRemoteFile=ConStr
    End function

    '==================================================
    '函数名:ReplaceSwfFile
    '作 用:解析动画路径
    '参 数:ConStr —— 要替换的字符串
    '参 数: TistUrl—— 当前网页地址
    '==================================================
    Function ReplaceSwfFile(ConStr,TistUrl)
    If ConStr="$False$" or ConStr="" or TistUrl="" or TistUrl="$False$" Then
    ReplaceSwfFile=ConStr
    Exit Function
    End If
    Dim TempStr,TempStr2,TempStr3,Re,Matches,Match,Tempi,TempArray,TempArray2

    Set Re = New Regexp
    Re.IgnoreCase = True
    Re.Global = True
    Re.Pattern ="]>"
    Set Matches =Re.Execute(ConStr)
    For Each Match in Matches
    If TempStr<>"" then
    TempStr=TempStr & "$Array$" & Match.Value
    Else
    TempStr=Match.Value
    End if
    Next
    If TempStr<>"" Then
    TempArray=Split(TempStr,"$Array$")
    TempStr=""
    For Tempi=0 To Ubound(TempArray)
    Re.Pattern ="value\s*=\s*.+?\.swf"
    Set Matches =Re.Execute(TempArray(Tempi))
    For Each Match in Matches
    If TempStr<>"" then
    TempStr=TempStr & "$Array$" & Match.Value
    Else
    TempStr=Match.Value
    End if
    Next
    Next
    End if
    If TempStr<>"" Then
    Re.Pattern ="value\s*=\s*"
    TempStr=Re.Replace(TempStr,"")
    End If
    If TempStr="" or IsNull(TempStr)=True Then
    ReplaceSwfFile=ConStr
    Exit function
    End if
    TempStr=Replace(TempStr,"""","")
    TempStr=Replace(TempStr,"'","")
    TempStr=Replace(TempStr," ","")

    Set Matches=nothing
    Set Re=nothing

    '去掉重复文件开始
    TempArray=Split(TempStr,"$Array$")
    TempStr=""
    For Tempi=0 To Ubound(TempArray)
    If Instr(Lcase(TempStr),Lcase(TempArray(Tempi)))<1 Then
    TempStr=TempStr & "$Array$" & TempArray(Tempi)
    End If
    Next
    TempStr=Right(TempStr,Len(TempStr)-7)
    TempArray=Split(TempStr,"$Array$")
    '去掉重复文件结束

    '转换相对地址开始
    TempStr=""
    For Tempi=0 To Ubound(TempArray)
    TempStr=TempStr & "$Array$" & DefiniteUrl(TempArray(Tempi),TistUrl)
    Next
    TempStr=Right(TempStr,Len(TempStr)-7)
    TempStr=Replace(TempStr,Chr(0),"")
    TempArray2=Split(TempStr,"$Array$")
    TempStr=""
    '转换相对地址结束

    '替换
    Set Re = New Regexp
    Re.IgnoreCase = True
    Re.Global = True
    For Tempi=0 To Ubound(TempArray2)
    RemoteFileUrl=TempArray2(Tempi)
    Re.Pattern =TempArray(Tempi)
    ConStr=Re.Replace(ConStr,RemoteFileUrl)
    Next
    Set Re=nothing
    ReplaceSwfFile=ConStr
    End function

    '==================================================
    '过程名:SaveRemoteFile
    '作 用:保存远程的文件到本地
    '参 数:LocalFileName ------ 本地文件名
    '参 数:RemoteFileUrl ------ 远程文件URL
    '==================================================
    Function SaveRemoteFile(LocalFileName,RemoteFileUrl)
    SaveRemoteFile=True
      dim Ads,Retrieval,GetRemoteData
      Set Retrieval = Server.CreateObject("Microsoft.XMLHTTP")
      With Retrieval
        .Open "Get", RemoteFileUrl, False, "", ""
        .Send
    If .Readystate<>4 then
    SaveRemoteFile=False
    Exit Function
    End If
        GetRemoteData = .ResponseBody
      End With
      Set Retrieval = Nothing
      Set Ads = Server.CreateObject("Adodb.Stream")
      With Ads
        .Type = 1
        .Open
        .Write GetRemoteData
        .SaveToFile server.MapPath(LocalFileName),2
        .Cancel()
        .Close()
      End With
      Set Ads=nothing
    end Function

    '==================================================
    '函数名:FpHtmlEnCode
    '作 用:标题过滤
    '参 数:fString ——字符串
    '==================================================
    Function FpHtmlEnCode(fString)
    If IsNull(fString)=False or fString<>"" or fString<>"$False$" Then
    fString=nohtml(fString)
    fString=FilterJS(fString)
    fString = Replace(fString," "," ")
    fString = Replace(fString,""","")
    fString = Replace(fString,"'","")
    fString = replace(fString, ">", "")
    fString = replace(fString, "<", "")
    fString = Replace(fString, CHR(9), " ")' 
    fString = Replace(fString, CHR(10), "")
    fString = Replace(fString, CHR(13), "")
    fString = Replace(fString, CHR(34), "")
    fString = Replace(fString, CHR(32), " ") 'space
    fString = Replace(fString, CHR(39), "")
    fString = Replace(fString, CHR(10) & CHR(10),"")
    fString = Replace(fString, CHR(10)&CHR(13), "")
    fString=Trim(fString)
    FpHtmlEnCode=fString
    Else
    FpHtmlEnCode="$False$"
    End If
    End Function

  • 相关阅读:
    linux中压缩、解压缩命令
    linux中的sed指令
    linux中shell编程(一)
    linux中的正则表达式
    linux中的管道和重定向
    linux中用户、组和权限相关指令
    linux中bash常见的指令
    linux文本操作相关指令
    java.lang.OutOfMemoryError 解决程序启动内存溢出问题
    Java常用排序算法/程序员必须掌握的8大排序算法
  • 原文地址:https://www.cnblogs.com/top5/p/1671527.html
Copyright © 2011-2022 走看看