zoukankan      html  css  js  c++  java
  • 通过referer url获取用户在搜索引擎中输入的关键字和关键字所在页数[asp]

    首先必须分析referer url中哪部分是关键字,哪部分是页数。例如在hao123输入关键字"我们",在搜索结果中的第7页访问你的网站,那么来源的url就是:http://www.baidu.com/s?lm=0&si=&rn=10&tn=sitehao123&ie=gb2312&ct=0&wd=%CE%D2%C3%C7&pn=60&cl=3
    上面的url中,wd=%CE%D2%C3%C7就是关键字部分,其中%CE%D2%C3%C7是"我们"两字通过urlencode得到的CED2是"我"字的GB编码,C3C7是"们"字的GB编码,只要使用urlDecode我们就可以得到原来用户输入了什么关键字访问了自己的网站。而pn=60则是你网站所在的页数,百度是每页10条记录,因此pn=0就是第一页,pn=60是第7页。若url中没有pn部分,则表示是第一页。一般来说,百度对关键字是使用GB编码(如"我们"二字编码成%CE%D2%C3%C7),除非url中含有ie=utf8,那么将采用utf8对关键字编码("我们"二字将编译成%E6%88%91%E4%BB%AC)。相对于百度,google则默认采用utf8对关键字编码,那么就需要将utf8编码转为unicode再通过chrw还原为原来的文字。

    一下列出各个搜索引擎的关键字和页数的String表示

    名称 关键字String  页数String
    www.baidu wd/word pn
    image.baidu word pn
    cache.baidu word pn
    tom  word pn
    sou.china query pgNum
    sogou query page
    114.vnet kw  start 
    seek.3721 name/p page
    sj53  wd pn
    iask k p

    ------------------------------------上面为多数采用gb对关键字进行编码,下面为多数采用utf8对关键字进行编码

    yahoo p b
    google q start
    baidugoo Keyword pn
    search.msn q first

    以上只是大致的结果,其实还有很多细致的地方,需要大家自己去发现,我不多说了。

    下面是原程序

    <%
    function shortAgent(agentString)
        shortAgent
    ="Other"
        
    if instr(agentString,"MSIE")>=1 then
            shortAgent
    =mid(agentString,instr(agentString,"MSIE"),8)
        
    end if
        
    if instr(agentString,"Opera")>=1 then
            shortAgent
    ="Opera"
        
    end if 
    end function
    %
    >
    <%
    Function URLDecode(enStr) 
    dim deStr 
    dim c,i,v 
    deStr
    ="" 
    for i=1 to len(enStr) 
        c
    =Mid(enStr,i,1
        
    if c="%" then 
            v
    =eval("&h"+Mid(enStr,i+1,2)) 
            
    if v<128 then 
                deStr
    =deStr&chr(v) 
                i
    =i+2 
            
    else 
                
    if isvalidhex(mid(enstr,i,3)) then 
                    
    if isvalidhex(mid(enstr,i+3,3)) then 
                        v
    =eval("&h"+Mid(enStr,i+1,2)+Mid(enStr,i+4,2)) 
                        deStr
    =deStr&chr(v) 
                        i
    =i+5 
                    
    else
                        v
    =eval("&h"+Mid(enStr,i+1,2)+cstr(hex(asc(Mid(enStr,i+3,1))))) 
                        deStr
    =deStr&chr(v) 
                        i
    =i+3 
                    
    end if 
                
    else 
                    destr
    =destr&
                
    end if 
            
    end if 
        
    else 
            
    if c="+" then 
                deStr
    =deStr&" " 
            
    else 
                deStr
    =deStr&
            
    end if 
        
    end if 
    next 
    URLDecode
    =deStr 
    end function 

    function isvalidhex(str) 
    isvalidhex
    =true 
    str
    =ucase(str) 
    if len(str)<>3 then isvalidhex=false:exit function 
    if left(str,1)<>"%" then isvalidhex=false:exit function 
    c
    =mid(str,2,1
    if not (((c>="0"and (c<="9")) or ((c>="A"and (c<="Z"))) then isvalidhex=false:exit function 
    c
    =mid(str,3,1
    if not (((c>="0"and (c<="9")) or ((c>="A"and (c<="Z"))) then isvalidhex=false:exit function 
    end function 

    function getKw(URLString,kwString)
        getKw
    =""
        kwString
    =kwString&"="
        kwArray
    =split(URLString,"&")
        
    for i=0 to UBound(kwArray)
            kwStart
    =instr(kwArray(i),kwString)
            
    if kwStart=1 then
                getKw
    =mid(kwArray(i),kwStart+len(kwString),len(kwArray(i)))
            
    end if
        
    next
    end function

    function getPn(URLString,pnString)
        getPn
    =0
        pnString
    =pnString&"="
        pnArray
    =split(URLString,"&")
        
    for i=0 to UBound(pnArray)
            pnStart
    =instr(pnArray(i),pnString)
            
    if pnStart=1 then
                getPn
    =mid(pnArray(i),pnStart+len(pnString),len(pnArray(i)))
            
    end if
        
    next
    end function

    function calculatePn(pn_tmp,pn_type,pn_beg,pn_base)
        
    if pn_type=0 then
            
    if pn_tmp=0 then pn_tmp=1
            calculatePn
    =pn_tmp
        
    else
            
    if pn_beg=0 then calculatePn=Int((pn_tmp+1)/pn_base)+1
            
    if pn_beg=1 then calculatePn=Int(pn_tmp/pn_base)+1
        
    end if
    end function
    %
    >
    <%
    function UTF2GB(UTFStr) 

    for Dig=1 to len(UTFStr) 
      
    '如果UTF8编码文字以%开头则进行转换
      if mid(UTFStr,Dig,1)="%" then 
         
    'UTF8编码文字大于8则转换为汉字
        if len(UTFStr) >= Dig+8 then 
           GBStr
    =GBStr & ConvChinese(mid(UTFStr,Dig,9)) 
           Dig
    =Dig+8 
        
    else 
          GBStr
    =GBStr & mid(UTFStr,Dig,1
        
    end if 
      
    else 
         GBStr
    =GBStr & mid(UTFStr,Dig,1
      
    end if 
    next 
    UTF2GB
    =GBStr 
    end function 

    'UTF8编码文字将转换为汉字
    function ConvChinese(x) 
       A
    =split(mid(x,2),"%"
       i
    =0 
       j
    =0 
      
    for i=0 to ubound(A) 
         A(i)
    =c16to2(A(i))
        
    ' Response.Write(A(i)&"<br/>") 
      next 
      
    for i=0 to ubound(A)-1 
        DigS
    =instr(A(i),"0"
        Unicode
    ="" 
        
    for j=1 to DigS-1 
          
    if j=1 then 
            A(i)
    =right(A(i),len(A(i))-DigS) 
            Unicode
    =Unicode & A(i) 
          
    else 
             i
    =i+1 
             A(i)
    =right(A(i),len(A(i))-2
             Unicode
    =Unicode & A(i) 
          
    end if 
          
    'Response.Write(Unicode&"<br/>")
        next 
        
    'Response.Write(c2to16(Unicode))
        if len(c2to16(Unicode))=4 then 
           ConvChinese
    =ConvChinese & chrw(int("&H" & c2to16(Unicode))) 
        
    else
           ConvChinese
    =ConvChinese & chr(int("&H" & c2to16(Unicode))) 
        
    end if 
      
    next 
    end function 

    '二进制代码转换为十六进制代码
    function c2to16(x)
       i
    =1 
       
    for i=1 to len(x) step 4 
          c2to16
    =c2to16 & hex(c2to10(mid(x,i,4))) 
       
    next 
    end function 

    '二进制代码转换为十进制代码
    function c2to10(x)
       c2to10
    =0 
       
    if x="0" then exit function 
         i
    =0 
       
    for i= 0 to len(x) -1 
          
    if mid(x,len(x)-i,1)="1" then c2to10=c2to10+2^(i) 
       
    next 
    end function 

    '十六进制代码转换为二进制代码
    function c16to2(x) 
        i
    =0 
        
    for i=1 to len(trim(x)) 
          tempstr
    = c10to2(cint(int("&h" & mid(x,i,1)))) 
          
    do while len(tempstr)<4 
             tempstr
    ="0" & tempstr 
          
    loop 
          c16to2
    =c16to2 & tempstr 
       
    next 
    end function 

    '十进制代码转换为二进制代码
    function c10to2(x) 
       mysign
    =sgn(x) 
       x
    =abs(x) 
       DigS
    =1 
       
    do 
          
    if x<2^DigS then 
            
    exit do 
          
    else 
            DigS
    =DigS+1 
          
    end if 
       
    loop 
       tempnum
    =

       i
    =0 
       
    for i=DigS to 1 step-1 
          
    if tempnum>=2^(i-1then 
             tempnum
    =tempnum-2^(i-1
             c10to2
    =c10to2 & "1" 
          
    else 
             c10to2
    =c10to2 & "0" 
          
    end if 
       
    next 
       
    if mysign=-1 then c10to2="-" & c10to2 
    end function
    %
    >
    <%
    function resolveRef(URLStringAll)
        URLString
    =split(URLStringAll,"?")(1)
        URLHead
    =split(URLStringAll,"?")(0)
        
    if instr(URLHead,"http://www.baidu.com")>=1 or instr(URLHead,"http://www1.baidu.com")>=1 then
            
    if getKw(URLString,"wd")="" then
                kw
    =URLDecode(getKw(URLString,"word"))
            
    else
                kw
    =URLDecode(getKw(URLString,"wd"))
            
    end if
            pn
    =calculatePn(getPn(URLString,"pn"),1,0,10)    
            
    exit function
        
    end if
        
    if instr(URLHead,"http://image.baidu.com")>=1 then
            kw
    =URLDecode(getKw(URLString,"word"))
            pn
    =calculatePn(getPn(URLString,"pn"),1,0,16)    
            
    exit function
        
    end if
        
    if instr(URLHead,"http://cache.baidu.com")>=1 then
            kw
    =URLDecode(getKw(URLString,"word"))
            kw
    =replace(kw,";","")
            kw
    =replace(kw,","," ")
            
    exit function
        
    end if
        
    if instr(URLHead,"http://search.tom.com")>=1 then
            
    if getKw(URLString,"word")="" then
                kw
    =URLDecode(getKw(URLString,"w"))
            
    else
                kw
    =URLDecode(getKw(URLString,"word"))
            
    end if
            pn
    =calculatePn(getPn(URLString,"pn"),1,0,10)
            
    exit function
        
    end if
        
    if instr(URLHead,"http://sou.china.com")>=1 then
            kw
    =URLDecode(getKw(URLString,"query"))
            pn
    =calculatePn(getPn(URLString,"pgNum"),0,0,10)
            
    exit function
        
    end if
        
    if instr(URLHead,"sogou.com")>=1 then
            kw
    =URLDecode(getKw(URLString,"query"))
            pn
    =calculatePn(getPn(URLString,"page"),0,0,10)
            
    exit function
        
    end if
        
    if instr(URLHead,"http://search.114.vnet.cn")>=1 then
            kw
    =URLDecode(getKw(URLString,"kw"))
            pn
    =calculatePn(getPn(URLString,"start"),1,0,10)
            
    exit function
        
    end if
        
    if instr(URLHead,"http://seek.3721.com")>=1 then
            kw
    =URLDecode(getKw(URLString,"name"))
            
    if kw="" then kw=URLDecode(getKw(URLString,"p"))
            pn
    =calculatePn(getPn(URLString,"page"),0,0,10)
            
    exit function
        
    end if
        
    if instr(URLHead,"http://search.msn.com.cn")>=1 then
            kw
    =UTF2GB(getKw(URLString,"q"))
            pn
    =calculatePn(getPn(URLString,"first"),1,1,10)
            
    exit function
        
    end if
        
    if instr(URLHead,"http://www.sj53.com")>=1 then
            kw
    =URLDecode(getKw(URLString,"wd"))
            pn
    =calculatePn(getPn(URLString,"pn"),1,0,10)
            
    exit function
        
    end if
        
    if instr(URLHead,"iask.com")>=1 then
            kw
    =URLDecode(getKw(URLString,"k"))
            pn
    =calculatePn(getPn(URLString,"p"),0,0,10)
            
    exit function
        
    end if
        
    if instr(URLHead,"http://www.baidugoo.com")>=1 then
            kw
    =UTF2GB(getKw(URLString,"Keyword"))
            pn
    =calculatePn(getPn(URLString,"pn"),0,0,10)
            
    exit function
        
    end if
        
    if instr(URLHead,"yahoo.com")>=1 then
            
    if instr(LCase(URLStringAll),"ei=utf-8")>=1 then
                
    if getKw(URLString,"p")="" then
                    kw
    =UTF2GB(getKw(URLString,"keyword"))
                
    else
                    kw
    =UTF2GB(getKw(URLString,"p"))
                
    end if
                pn
    =calculatePn(getPn(URLString,"b"),1,1,10)
            
    else
                
    if getKw(URLString,"p")="" then
                    kw
    =URLDecode(getKw(URLString,"keyword"))
                
    else
                    kw
    =URLDecode(getKw(URLString,"p"))
                
    end if
                pn
    =calculatePn(getPn(URLString,"b"),1,1,10)
            
    end if
            
    exit function
        
    end if
        
    if instr(URLStringAll,"http://www.google")>=1 then    
            kw
    =UTF2GB(getKw(URLString,"q"))
            pn
    =calculatePn(getPn(URLString,"start"),1,0,10)
            
    exit function
        
    end if
    end function
    %
    >
  • 相关阅读:
    集合---Map
    一个机器部署多个tomcat
    JavaScript要不要加分号";"
    Nodejs 路径 /, ./, ../, ..// 的区别
    玩转Vue的24个小程序---基础篇
    如何创建Node.js Web服务器
    为什么Ajax XMLHttpRequest POST方法传递参数失败了
    字典元素如何遍历
    Beautiful Soup 如何获取到href
    如何查看Ajax请求
  • 原文地址:https://www.cnblogs.com/nasdaqhe/p/608558.html
Copyright © 2011-2022 走看看