zoukankan      html  css  js  c++  java
  • go一个简单的爬虫(豆瓣)

    最近在学习go语言爬虫,写了个小demo

    package main
    
    import (
    	"fmt"
    	"io/ioutil"
    	"net/http"
    	"regexp"
    	"strconv"
    )
    
    type Movie struct {
    	name   string
    	mark   string
    	person string
    	time   string
    	url    string
    }
    
    func main() {
    	chs := make([] chan int, 10)
    	sliceList := []int{1291841,26761416,1309220,1300741,1293172}
    	for i,v:=range sliceList{
    		go child(v, chs[i])
    	}
    	for _, ch := range chs {
    		<-ch
    	}
    }
    
    func child(id int, ch chan int) {
    	url := "https://movie.douban.com/subject/" + strconv.Itoa(id) + "/"
    	resp, err := http.Get(url)
    	if err != nil {
    		panic(err)
    	}
    	defer resp.Body.Close()
    	sHtml, _ := ioutil.ReadAll(resp.Body)
    
    	movie := new(Movie)
    
    	movie.name = GetValue(`<spans*property="v:itemreviewed">(.*)</span>`, &sHtml)
    	movie.mark = GetValue(`<strongs*class="lls*rating_num"s*property="v:average">(.*)</strong>`, &sHtml)
    	movie.person = GetValue(`<a href="/celebrity/[0-9]+/" rel="v:directedBy">(.*)</a>`, &sHtml)
    	movie.time = GetValue(`<span property="v:runtime" content="(.*)">.*</span>`, &sHtml)
    	movie.url = GetValue(`<a href="(.*)" target="_blank" rel="nofollow">.*</a>`, &sHtml)
    
    	fmt.Println(movie)
    
    	ch <- 1
    }
    
    func GetValue(rule string, sHtml *[] byte) string {
    	reg := regexp.MustCompile(rule)
    	result := reg.FindAllStringSubmatch(string(*sHtml), 1)
    	return result[0][1]
    }
    

      

  • 相关阅读:
    github上的每日学习 13
    github上的每日学习 12
    github上的每日学习 11
    github上的每日学习 10
    github上的每日学习 9
    github上的每日学习 8
    github上的每日学习 7
    面向对象程序设计寒假作业2
    MySQL安装和配置
    Fast Packet Processing with eBPF and XDP部分
  • 原文地址:https://www.cnblogs.com/piaobodewu/p/11086811.html
Copyright © 2011-2022 走看看