zoukankan      html  css  js  c++  java
  • goquery简单爬取ebay

    第三方包的安装

    • goquery

    goquery的github地址:github.com/PuerkitoBio/goquery

                                package main
    

    import(
    "fmt"
    "net/http"
    "io/ioutil"
    "strings"
    "github.com/PuerkitoBio/goquery"
    )

    //预定义错误处理
    func handleError(err error, why string){
    if err != nil {
    fmt.Print(why,err)
    }
    }

    //获取页面封装
    func getPages(url string)(pageStr string){
    resp, err := http.Get(url)
    resp.Header.Add("Host","www.ebay.com")
    resp.Header.Add("Connection","keep-alive")
    resp.Header.Add("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8")
    resp.Header.Add("Accept-Language","zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2")
    resp.Header.Add("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox")
    resp.Header.Add("Cookie","nonsession=BAQAAAWqAI+j2AAaAADMABl60yOw1MTgwMDAAywABXNOcdDIAygAgZjmW7DlhN2ZhMDUzMTZhMGFhZGQyMTI0ZjkxNmZmZmE3MjRhixCPqGLKye5BuKLFvWdMzprH4kg; s=CgAD4ACBc1ObsOWE3ZmEwNTMxNmEwYWFkZDIxMjRmOTE2ZmZmYTcyNGGhAzuC; dp1=bu1p/QEBfX0BAX19AQA**5eb4c8ecbl/CN6095fc6c; ebay=%5Esbf%3D%23%5Ejs%3D1%5Epsi%3DAf6B2MwI%5E; ak_bmsc=68BEC4C2CCB9CBF4D24A609F3781E6AE17036813C12000006895D35CE3AC3162~plUQDUg9/LiE/57OsXMbVM1wcDfKqG/SApfWftxrhtgLduhxKfsBp6CMzXhGHW1LJXFP+AXDCH4QaZyT8gmIVVaARRCqhjEtNpbOFVBnKCg/1YaCBlTgXb7UKFL6+ydixzxZ4mmSbcU7NP2lBOegbyLe05KsV/OyYq3JmK9RVfuT4MiZUg+WXcqdQALXmiYOrb6ZzfTYGjBKSaO8lDGE3Ejn/SENnN/rrVzHMBBBTeiFs=; bm_sv=77E98EA7DB2BF215DFACDD76E331005C~8F3r1OwVRAmlwMYb8F7yPSMIiY5n6VeLD+6XrZSTYyjZ7If+e7XZeclQoUK40241+O9vp9XsERUvGzAv0HzEzJXx8oWKO/D2b/9cCTerVgXUS1UqoBodtIlvmVcskACUAp0dXB6wIfO8oebPY3dj1w==; ds2=sotr/b9Votzzzzzzz^")
    handleError(err,"http.Get")
    defer resp.Body.Close()
    body, err := ioutil.ReadAll(resp.Body)
    handleError(err, "ioutil.ReadAll")
    page := string(body)
    return page
    }

    //ebay页面分析
    func spiderEbay(){
    pageStr := getPages("https://www.ebay.com/b/Apple-iPhone/9355/bn_319682")
    doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageStr))
    handleError(err,"goquery.NewDocumentFromReader")
    doc.Find(".s-item ").Each(func(i int, s *goquery.Selection){

        title := s.Find(".s-item__title").Text()
        image,_ := s.Find(".s-item__image-img").Attr("src")
    		fmt.Printf("	 	
    ",title,image)
    		
    	
    
    })
    

    }

    func main(){
    fmt.Printf("this is crawler")
    spiderEbay()

    }

    一个人光有知识是远不够的,知识是一个量的积累.可以在拥有知识的前提下,掌握一门技术
  • 相关阅读:
    052_from表单的两种请求方式
    051_ajax的两种请求方式与传递流
    050_SpringMVC配置文件解析器
    049_文件下载为什么只能使用同步请求?
    048_io流
    048_get与url的编码问题
    062_什么是http协议?什么又是三次握手?
    020_全选功能无法出现统一协调时
    064_js中function怎么才能有返回值呢?
    Kali单用户模式下重置登录口令教程
  • 原文地址:https://www.cnblogs.com/ashton/p/10967355.html
Copyright © 2011-2022 走看看