zoukankan      html  css  js  c++  java
  • 纯golang爬虫实战-(六)-关于cookiejar的理解 (2020-02-14 13:50)

    对上一篇遗留的cookie的问题,从这里https://studygolang.com/articles/5228找到一篇几年前的代码,原作者golang_yh发表的原文已经不见了,我对代码中的一处小错误进行了修复。

    感觉这段代码有助于理解cookiejar 

    package main
    
    import (
        "fmt"
        "io/ioutil"
        "net/http"
        "net/http/cookiejar"
    )
    
    var gCurCookies []*http.Cookie
    var gCurCookieJar *cookiejar.Jar
    
    func initAll() {
        gCurCookies = nil
        //var err error;
        gCurCookieJar, _ = cookiejar.New(nil)
    
    }
    
    //get url response html
    func getUrlRespHtml(url string) string {
        fmt.Printf("getUrlRespHtml, url=%s", url)
    
        var respHtml string = ""
    
        httpClient := &http.Client{
            CheckRedirect: nil,
            Jar:           gCurCookieJar,
        }
    
        httpReq, err := http.NewRequest("GET", url, nil)
        httpResp, err := httpClient.Do(httpReq)
        if err != nil {
            fmt.Printf("http get url=%s response error=%s
    ", url, err.Error())
        }
        fmt.Printf("httpResp.Header=%s", httpResp.Header)
        fmt.Printf("httpResp.Status=%s", httpResp.Status)
    
        defer httpResp.Body.Close()
    
        body, errReadAll := ioutil.ReadAll(httpResp.Body)
        if errReadAll != nil {
            fmt.Printf("get response for url=%s got error=%s
    ", url, errReadAll.Error())
        }
        //全局保存
        gCurCookies = gCurCookieJar.Cookies(httpReq.URL)
    
        respHtml = string(body)
    
        return respHtml
    }
    
    func dbgPrintCurCookies() {
        var cookieNum int = len(gCurCookies)
        fmt.Printf("cookieNum=%d", cookieNum)
        for i := 0; i < cookieNum; i++ {
            var curCk *http.Cookie = gCurCookies[i]
            fmt.Printf("
    ------ Cookie [%d]------", i)
            fmt.Printf("	Name=%s", curCk.Name)
            fmt.Printf("	Value=%s", curCk.Value)
            fmt.Printf("	Path=%s", curCk.Path)
            fmt.Printf("	Domain=%s", curCk.Domain)
            fmt.Printf("	Expires=%s", curCk.Expires)
            fmt.Printf("	RawExpires=%s", curCk.RawExpires)
            fmt.Printf("	MaxAge=%d", curCk.MaxAge)
            fmt.Printf("	Secure=%t", curCk.Secure)
            fmt.Printf("	HttpOnly=%t", curCk.HttpOnly)
            fmt.Printf("	Raw=%s", curCk.Raw)
            fmt.Printf("	Unparsed=%s", curCk.Unparsed)
        }
    }
    
    func main() {
        initAll()
    
        fmt.Printf("====== step 1:get Cookie ======")
        var baiduMainUrl string = "http://www.baidu.com/"
        fmt.Printf("baiduMainUrl=%s", baiduMainUrl)
        getUrlRespHtml(baiduMainUrl)
        dbgPrintCurCookies()
    
        fmt.Printf("
    ====== step 2:use the Cookie ======")
    
        var getapiUrl string = "https://passport.baidu.com/v2/api/?getapi&class=login&tpl=mn&tangram=true"
        getUrlRespHtml(getapiUrl)
        dbgPrintCurCookies()
    }

     还有这里 https://segmentfault.com/q/1010000010339661 的实践有助于深入理解cookiejar

  • 相关阅读:
    unity基础开发----Unity获取PC,Ios系统的mac地址等信息
    Web UI设计师需要了解的用栅格化系统指导网页设计
    设计网页,常见的宽度是多少像素?
    C#常用类库简介(二)
    将本地代码上传到gitLab
    删除git 分支
    git 新建分支
    将子分支代码merge到主分支master分支
    dev分支代码覆盖master分支代码
    使用flex的同时设置超出喜爱是省略号,
  • 原文地址:https://www.cnblogs.com/pu369/p/12318490.html
Copyright © 2011-2022 走看看