zoukankan      html  css  js  c++  java
  • go爬虫

    package main
    
    import (
    	"fmt"
    	"github.com/antchfx/htmlquery"
    	"github.com/kirinlabs/HttpRequest"
    	"labix.org/v2/mgo"
    	"labix.org/v2/mgo/bson"
    	"strconv"
    	"strings"
    	"sync"
    )
    
    
    var db *mgo.Database
    var c *mgo.Collection
    func init(){
    	session, err := mgo.Dial("mongodb://localhost:27017")  //连接数据库
    	if err != nil {
    		panic(err)
    	}
    	//defer session.Close()
    	session.SetMode(mgo.Monotonic, true)
    	db = session.DB("myjs")	 //数据库名称
    	c = db.C("fuli")
    }
    
    var wg sync.WaitGroup
    
    func main() {
    
    	wg.Add(21)
    	for i:=1;i<22;i++{
    		go HttpImg("http://www.mntuxiu.com/page/"+ strconv.Itoa(i)+ "/")
    	}
    	wg.Wait()
    }
    
    
    func HttpImg(url string){
    
    	response, e := HttpRequest.Get(url)
    	if e != nil{
    		panic(e.Error())
    	}
    	bytes, e := response.Body()
    	html := string(bytes)
    	node, e := htmlquery.Parse(strings.NewReader(html))
    
    	list := htmlquery.Find(node, "//*[@id='index_ajax_list']/li/a/img")
    
    
    	for _,n := range list{
    		fmt.Println(n.Attr[2].Val,n.Attr[3].Val)
    		img, _ := HttpRequest.Get(n.Attr[2].Val)
    		byts, _ := img.Body()
    		e := c.Insert(&User{
    			Id_:  bson.NewObjectId(),
    			Name: n.Attr[3].Val,
    			Bs64: byts,
    		})
    		if e !=nil{
    			panic(e)
    		}
    
    	}
    
    	wg.Done()
    }
    
    
    
    
    type User struct
    {
    	Id_ bson.ObjectId `bson:"_id"`
    	Name string `bson:"name"`
    	Bs64 []byte `bson:"Bs64"`
    }
    

      

  • 相关阅读:
    数据库中生成UUID的方式
    db2如果修改主机名之后
    linux修改主机名
    db2动态查看备份进度
    oracle-DG
    linux环境变量和对应文件的生效顺序
    数据泵与传统exp/imp对比
    oracle之ogg部署(RAC到单机)
    oracle之ogg部署(单机到单机)
    达梦 (实时主备+数据守护)测试
  • 原文地址:https://www.cnblogs.com/kjtt/p/13208564.html
Copyright © 2011-2022 走看看