zoukankan      html  css  js  c++  java
  • 纯golang爬虫实战-(五)-登录并带cookie访问

    之前写的代码访问内网网站,在实践中发现以下现象:

    1、访问网站时如不设置headers 会返回包含xss字样的提示

    2:fiddler截获后,只有在IE浏览器仍处于登录状态时,才能补发成功。当在浏览器中退出登录后,fiddler补发不成功。

    3:我将浏览器登录成功后的http headers复制到代码中,其中包含了cookies ,此时运行代码可以成功。但是在浏览器中退出登录后,代码运行也是未登录状态。

    说明浏览器登录后,服务器上才保持了有效的sessionID,起初怀疑是不是因为httponly设置的影响?

    对照现象2和3,说明之前的go代码虽然用了cookiejar,但似乎没能携带cookies去访问,用类似以下代码也不起作用:

        jar.SetCookies(req.URL, []*http.Cookie{
            &http.Cookie{Name: "PHPSESSID", Value: "26c2tkqumv2a2l4o34qtdcbs80", HttpOnly: false},
            &http.Cookie{Name: "security", Value: "impossible", HttpOnly: false},
        })
        client.Jar = jar

    用  req.AddCookie手工设置cookies;试了也不行。

        u, err := url.Parse("http://192.168.132.80/login/login.jsp")
        for _, v := range jar.Cookies(u) {
            req.AddCookie(v)
        }

    后来参考这里https://www.oschina.net/question/593413_139087,:可以把Transport包装了一下,在RoundTrip开始和结束的位置进行cookie的管理。但不太会用Transport http.RoundTripper 。

    最终还是用fiddler拦截请求,发现原来在chrome console中看不到正确的POST地址和http header,当然没法成功登录!注意:

    1、POST提交表单时,要设置Content-Type: application/x-www-form-urlencoded

    2、在chrome console中可看到:post提交的body(或叫form data)与header部分是有一个空行分隔开的。并且在header中有类似Content-Length: 258的字样。258表示body(或叫form data)的字符数。

     最终的测试代码如下:

    package main
    
    import (
        "fmt"
        "io/ioutil"
        "net/http"
        "net/http/cookiejar"
        "strings"
    )
    
    var gCurCookies []*http.Cookie
    var gCurCookieJar *cookiejar.Jar
    
    func initAll() {
        gCurCookies = nil
        //var err error;
        gCurCookieJar, _ = cookiejar.New(nil)
    
    }
    
    //1 get url response html
    func getUrlRespHtml(url string) string {
        fmt.Printf("
    getUrlRespHtml, url=%s", url)
    
        var respHtml string = ""
    
        httpClient := &http.Client{
            CheckRedirect: nil,
            Jar:           gCurCookieJar,
        }
    
        httpReq, err := http.NewRequest("GET", url, nil)
        httpResp, err := httpClient.Do(httpReq)
        if err != nil {
            fmt.Printf("
    http get url=%s response error=%s
    ", url, err.Error())
        }
        fmt.Printf("
    httpResp.Header=%s", httpResp.Header)
        fmt.Printf("
    httpResp.Status=%s", httpResp.Status)
    
        defer httpResp.Body.Close()
    
        body, errReadAll := ioutil.ReadAll(httpResp.Body)
        if errReadAll != nil {
            fmt.Printf("
    get response for url=%s got error=%s
    ", url, errReadAll.Error())
        }
        //全局保存
        gCurCookies = gCurCookieJar.Cookies(httpReq.URL)
    
        respHtml = string(body)
        return respHtml
    }
    
    //2
    func getUrlRespHtmlWithHeader(url, headers string) string {
        fmt.Printf("
    getUrlRespHtml, url=%s", url)
    
        var respHtml string = ""
    
        httpClient := &http.Client{
            CheckRedirect: nil,
            Jar:           gCurCookieJar,
        }
    
        httpReq, err := http.NewRequest("GET", url, nil)
        AddHeaders(httpReq, headers)
        httpResp, err := httpClient.Do(httpReq)
        if err != nil {
            fmt.Printf("
    http get url=%s response error=%s
    ", url, err.Error())
        }
        fmt.Printf("
    httpResp.Header=%s", httpResp.Header)
        fmt.Printf("
    httpResp.Status=%s", httpResp.Status)
        fmt.Printf("
    httpResp.cookies=%s", httpResp.Cookies())
    
        defer httpResp.Body.Close()
    
        body, errReadAll := ioutil.ReadAll(httpResp.Body)
        if errReadAll != nil {
            fmt.Printf("
    get response for url=%s got error=%s
    ", url, errReadAll.Error())
        }
        //全局保存
        gCurCookies = gCurCookieJar.Cookies(httpReq.URL)
    
        respHtml = string(body)
        return respHtml
    }
    
    //3
    func PostUrlRespHtmlWithHeader(url, headers, formdata string) string {
        fmt.Printf("
    getUrlRespHtml, url=%s", url)
    
        var respHtml string = ""
    
        httpClient := &http.Client{
            CheckRedirect: nil,
            Jar:           gCurCookieJar,
        }
    
        httpReq, err := http.NewRequest("POST", url, ioutil.NopCloser(strings.NewReader(formdata)))
        AddHeaders(httpReq, headers)
        httpReq.Header.Set("ContentType", "application/x-www-form-urlencoded")
        httpResp, err := httpClient.Do(httpReq)
        if err != nil {
            fmt.Printf("
    http get url=%s response error=%s
    ", url, err.Error())
        }
        fmt.Printf("
    httpResp.Header=%s", httpResp.Header)
        fmt.Printf("
    httpResp.Status=%s", httpResp.Status)
    
        defer httpResp.Body.Close()
    
        body, errReadAll := ioutil.ReadAll(httpResp.Body)
        if errReadAll != nil {
            fmt.Printf("
    get response for url=%s got error=%s
    ", url, errReadAll.Error())
        }
        //全局保存
        gCurCookies = gCurCookieJar.Cookies(httpReq.URL)
    
        respHtml = string(body)
        return respHtml
    }
    
    func dbgPrintCurCookies() {
        var cookieNum int = len(gCurCookies)
        fmt.Printf("cookieNum=%d", cookieNum)
        for i := 0; i < cookieNum; i++ {
            var curCk *http.Cookie = gCurCookies[i]
            fmt.Printf("
    
    
    
    ------ Cookie [%d]------", i)
            fmt.Printf("
    	Name=%s", curCk.Name)
            fmt.Printf("
    	Value=%s", curCk.Value)
            fmt.Printf("
    	Path=%s", curCk.Path)
            fmt.Printf("
    	Domain=%s", curCk.Domain)
            fmt.Printf("
    	Expires=%s", curCk.Expires)
            fmt.Printf("
    	RawExpires=%s", curCk.RawExpires)
            fmt.Printf("
    	MaxAge=%d", curCk.MaxAge)
            fmt.Printf("
    	Secure=%t", curCk.Secure)
            fmt.Printf("
    	HttpOnly=%t", curCk.HttpOnly)
            fmt.Printf("
    	Raw=%s", curCk.Raw)
            fmt.Printf("
    	Unparsed=%s", curCk.Unparsed)
        }
    }
    
    func AddHeaders(req *http.Request, headers string) *http.Request {
        //将传入的Header分割成[]ak和[]av
        a := strings.Split(headers, "
    ")
        ak := make([]string, len(a[:]))
        av := make([]string, len(a[:]))
        //要用copy复制值;若用等号仅表示指针,会造成修改ak也就是修改了av
        copy(ak, a[:])
        copy(av, a[:])
        //fmt.Println(ak[0], av[0])
        for k, v := range ak {
            i := strings.Index(v, ":")
            j := i + 1
            ak[k] = v[:i]
            av[k] = v[j:]
            //设置Header
            req.Header.Set(ak[k], av[k])
        }
        return req
    }
    
    func main() {
        initAll()
        /*
            fmt.Printf("====== step 1:get Cookie ======")
            var MainUrl string = "http://192.168.132.80/login/login.jsp"
            fmt.Printf("
    MainUrl=%s", MainUrl)
            getUrlRespHtmlWithHeader(MainUrl, headers2)
            dbgPrintCurCookies()
        */
    
        fmt.Printf("
    
    
    ====== step 2:get Cookie ======")
        var headers2 = `Accept: text/html, application/xhtml+xml, */*
    Referer: http://192.168.132.80/login/login.jsp
    Accept-Language: zh-CN
    User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko
    Content-Type: application/x-www-form-urlencoded
    Accept-Encoding: gzip, deflate
    Host: 192.168.132.80
    Content-Length: 258
    Connection: Keep-Alive
    Pragma: no-cache
    Cookie: logincookiecheck=1581819550262+C1D3FCB434C8223BE9C4CE5AD9497183; testBanCookie=test; JSESSIONID=abcrJrk4lxqzZccwgDUax; loginfileweaver=%2Fwui%2Ftheme%2Fecology7%2Fpage%2Flogin.jsp%3FtemplateId%3D6%26logintype%3D1%26gopage%3D; loginidweaver=114; languageidweaver=7`
        var formdata = `loginfile=%2Fwui%2Ftheme%2Fecology7%2Fpage%2Flogin.jsp%3FtemplateId%3D6%26logintype%3D1%26gopage%3D&logintype=1&fontName=%CE%A2%C8%ED%D1%C5%BA%DA&message=&gopage=&formmethod=post&rnd=&serial=&username=&isie=true&loginid=admin&userpassword=1234&submit=`
        var getapiUrl string = "http://192.168.132.80/login/VerifyLogin.jsp "
        PostUrlRespHtmlWithHeader(getapiUrl, headers2, formdata)
        dbgPrintCurCookies()
    
        fmt.Printf("
    
    
    ====== step 3:use the Cookie ======")
        var headers3 = `Host: 192.168.132.80
    Connection: keep-alive
    Pragma: no-cache
    Cache-Control: no-cache
    Upgrade-Insecure-Requests: 1
    User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36
    Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
    Accept-Encoding: gzip, deflate
    Accept-Language: zh-CN,zh;q=0.9`
        var getapiUrl3 string = "http://192.168.132.80/docs/docs/DocMoreForHp.jsp?eid=660&date2during=0&tabid=2"
        getUrlRespHtmlWithHeader(getapiUrl3, headers3)
        dbgPrintCurCookies()
    }
     
  • 相关阅读:
    什么是内部类
    "=="和equals方法究竟有什么区别?
    SWFUpload乱码问题的解决
    xStream转换XML、JSON
    Java文件下载
    笔记摘录
    Javascript 函数传参问题
    JQUERY伸缩导航
    ruby关于flip-flop理解上一个注意点
    ruby 使用Struct场景
  • 原文地址:https://www.cnblogs.com/pu369/p/12307162.html
Copyright © 2011-2022 走看看