zoukankan      html  css  js  c++  java
  • golang模拟新浪微博登录

    1.基于幽灵蛛pholcus开源项目的规则

    直接贴代码,代码可以更改后用于其他爬虫项目

    package pholcus_lib
    
    // 基础包
    import (
    	// "github.com/henrylee2cn/pholcus/common/goquery"                          //DOM解析
    	"github.com/henrylee2cn/pholcus/app/downloader/request" //必需
    	. "github.com/henrylee2cn/pholcus/app/spider"           //必需
    	// . "github.com/henrylee2cn/pholcus/app/spider/common" //选用
    	// "github.com/henrylee2cn/pholcus/logs"
    	// net包
    	// "net/http" //设置http.Header
    	// "net/url"
    	// 编码包
    	// "encoding/xml"
    	//"encoding/json"
    	// 字符串处理包
    	//"regexp"
    	// "strconv"
    	// "fmt"
    	// "math"
    
    	//"net/http"
    	"strconv"
    	"regexp"
    	"fmt"
    	"encoding/json"
    	"net/url"
    	//"strings"
    	//"strings"
    	"strings"
    	"github.com/henrylee2cn/pholcus/common/goquery"
    	//"net/http"
    )
    ////获取unix时间
    var millisecond int64
    //用户名
    var name string
    //密码
    var password string
    //解析json结构体
    type (
    	Info struct {
    		Retcode  int
    		Uid string
    		Nick string
    		CrossDomainUrlList []string
    	}
    )
    func init() {
    	FileTest.Register()
    	millisecond = getMillisecond()
    	name="88888888"
    	password="8888888"
    	name = encryptUname(name)
    
    }
    
    var FileTest = &Spider{
    	Name:        "微博登录测试",
    	Description: "测试 [s.weibo.com/user/]",
    	Pausetime: 1500,
    	Keyin:   KEYIN,
    	// Limit:        LIMIT,
    	EnableCookie: true,
    	RuleTree: &RuleTree{
    		Root: func(ctx *Context) {
    			//https://weibo.cn/
    			ctx.AddQueue(&request.Request{
    				Url:          "https://login.sina.com.cn/sso/prelogin.php?entry=account&callback=sinaSSOController.preloginCallBack&su="+name+"&rsakt=mod&client=ssologin.js(v1.4.15)&_="+strconv.FormatInt(millisecond,10),
    				Rule:         "登录一",
    				//DownloaderID:1,
    			})
    		},
    
    		Trunk: map[string]*Rule{
    			"登录一": {
    				ParseFunc: func(ctx *Context) {
    
    					str := ctx.GetText()
    					println("-----1-----" + str)
    
    					compile1, _ := regexp.Compile("{.*}")
    					match1 := compile1.FindString(str)
    					fmt.Println(match1)
    					//json str 转map
    					var dat map[string]interface{}
    					if err := json.Unmarshal([]byte(match1), &dat); err == nil {
    						if err != nil{
    							println("转换异常!")
    						}
    					}
    					servertime := dat["servertime"]
    					servertime= strconv.FormatFloat(servertime.(float64), 'f', -1, 64)
    					nonce:=dat["nonce"]
    					pubkey:=dat["pubkey"]
    					rsakv := dat["rsakv"]
    
    					//加密密码
    
    					ep := encryptPassword(pubkey.(string), servertime.(string), nonce.(string), password)
    
    					postDict := map[string]string{}
    					postDict["entry"] = "account"
    					postDict["gateway"] = "1"
    					postDict["from"] = ""
    					postDict["savestate"] = "30"
    					postDict["qrcode_flag"] = "true"
    					postDict["useticket"] = "0"
    					postDict["pagerefer"] = ""
    					postDict["vsnf"] = "1"
    					postDict["su"] = name
    					postDict["service"] = "account"
    					postDict["servertime"] = servertime.(string)
    					postDict["nonce"] = nonce.(string)
    					postDict["pwencode"] = "rsa2"
    					postDict["rsakv"] = rsakv.(string)
    					postDict["sp"] = ep
    					postDict["sr"] = "1395*822"
    					postDict["cdult"] = "3"
    					postDict["domain"] = "sina.com.cn"
    					postDict["prelt"] = "170"
    					postDict["returntype"] = "TEXT"
    
    					postValues := url.Values{}
    					for postKey, PostValue := range postDict{
    						postValues.Set(postKey, PostValue)
    					}
    
    					//post参数编码
    					postDataStr := postValues.Encode()
    					ctx.AddQueue(&request.Request{
    						Url:          "https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.15)&_="+strconv.FormatInt(getMillisecond(),10),
    						Method:       "POST",
    						EnableCookie: true,
    						PostData:     postDataStr,
    						Rule:         "登录二",
    						//DownloaderID:1,
    					})
    				},
    			},
    
    			"登录二": {
    				ParseFunc: func(ctx *Context) {
    
    					str := ctx.GetText()
    
    					println("-----2-----" + str)
    
    					var dat Info
    					json.Unmarshal([]byte(str), &dat)
    					//此处获取2个链接,包含普通版和移动版
    					//print(dat.CrossDomainUrlList[2])
    
    					ctx.AddQueue(&request.Request{
    						Url:          dat.CrossDomainUrlList[2],
    						Method:       "GET",
    						EnableCookie: true,
    						Rule:         "登录三",
    					})
    				},
    			},
    
    			"登录三": {
    				ParseFunc: func(ctx *Context) {
    
    					ctx.AddQueue(&request.Request{
    						Url:          "https://weibo.cn/",
    						Method:       "GET",
    						EnableCookie: true,
    						Rule:         "重定向一",
    					})
    				},
    			},
    
    			"重定向一": {
    				ParseFunc: func(ctx *Context) {
    
    					compile2, _ := regexp.Compile("[a-zA-z]+://[^\s]*")
    
    					string := compile2.FindAllString(ctx.GetText(), 2)
    
    					ctx.AddQueue(&request.Request{
    						Url:          string[1],
    						Method:       "GET",
    						EnableCookie: true,
    						Rule:         "重定向二",
    					})
    
    				},
    			},
    
    			"重定向二": {
    				ParseFunc: func(ctx *Context) {
    
    					compile2, _ := regexp.Compile("[a-zA-z]+://[^\s]*")
    
    					string := compile2.FindAllString(ctx.GetText(), 3)
    					ctx.AddQueue(&request.Request{
    						Url:          string[2],
    						Method:       "GET",
    						EnableCookie: true,
    						Rule:         "进入首页",
    					})
    
    				},
    			},
    
    			"进入首页": {
    				ParseFunc: func(ctx *Context) {
    					for z := 1;z<=2;z++{
    						ctx.AddQueue(&request.Request{
    							Url:          "https://weibo.cn/search/user/?keyword="+ ctx.GetKeyin() + "&page=" + strconv.Itoa(z),// //,
    							Rule:         "查找微博",
    							Method:		  "GET",
    							EnableCookie: true,
    							//PostData:"keyword=财经&suser=找人",
    							//DownloaderID:1,smblog
    						})
    					}
    
    				},
    			},
    
    			"查找微博": {
    
    				ParseFunc: func(ctx *Context) {
    
    					println("---------------查找微博-------------")
    
    					query := ctx.GetDom()
    
    					navBox := query.Find("table")
    
    					navBox.Each(func(i int, s *goquery.Selection) {
    
    						str := s.Find("tr").Text()
    						j := strings.LastIndex(str,"粉丝")
    						z := strings.LastIndex(str,"人")
    
    						//昵称
    						name := str[0:j]
    						//粉丝数
    						fansNum := str[j+6:z]
    						//地区
    						city := str[z+5:len(str)]
    
    						println("name" + name)
    
    						//链接
    						if url, ok := s.Find("table tr td a").Attr("href"); ok {
    
    							ctx.AddQueue(&request.Request{
    								Url:  "https://weibo.cn" + url,
    								Rule: "博主首页",
    								Temp: map[string]interface{}{
    									"name":  name,
    									"fansNum": fansNum,
    									"city":  city,
    								},
    							})
    						}
    					})
    
    				},
    			},
    
    			"博主首页": {
    				ParseFunc: func(ctx *Context) {
    
    					//昵称
    					name := ctx.GetTemp("name","").(string)
    					//粉丝数
    					fansNum := ctx.GetTemp("fansNum","").(string)
    					//地区
    					city := ctx.GetTemp("city","").(string)
    
    					//微博数
    					weiboNum := ctx.GetDom().Find(".tc").Text()
    					j := strings.LastIndex(weiboNum,"[")
    					z := strings.LastIndex(weiboNum,"]")
    					weiboNum = weiboNum[j+1:z]
    
    					//关注数
    					attentionNum := ctx.GetDom().Find(".tip2 a").Eq(0).Text()
    					j = strings.LastIndex(attentionNum,"[")
    					z = strings.LastIndex(attentionNum,"]")
    					attentionNum = attentionNum[j+1:z]
    
    					a :=ctx.GetDom().Find(".ut a").Eq(1)
    
    
    					if a.Text() == "加关注"{
    						if url, ok := ctx.GetDom().Find(".ut a").Eq(3).Attr("href"); ok {
    
    							ctx.AddQueue(&request.Request{
    								Url:  "https://weibo.cn" + url,
    								Rule: "资料页",
    								EnableCookie: true,
    								Temp: map[string]interface{}{
    									"name":  name,
    									"fansNum": fansNum,
    									"city":  city,
    									"weiboNum":  weiboNum,
    									"attentionNum":  attentionNum,
    								},
    							})
    						}
    					} else{
    						if url, ok := ctx.GetDom().Find(".ut a").Eq(2).Attr("href"); ok {
    
    							ctx.AddQueue(&request.Request{
    								Url:  "https://weibo.cn" + url,
    								Rule: "资料页",
    								EnableCookie: true,
    								Temp: map[string]interface{}{
    									"name":  name,
    									"fansNum": fansNum,
    									"city":  city,
    									"weiboNum":  weiboNum,
    									"attentionNum":  attentionNum,
    								},
    							})
    						}
    					}
    
    				},
    			},
    
    			"资料页": {
    
    				ItemFields: []string{
    					"昵称",
    					"粉丝数",
    					"地区",
    					"微博数",
    					"关注数",
    					"标签",
    					"详细信息",
    				},
    				ParseFunc: func(ctx *Context) {
    
    					//昵称
    					name := ctx.GetTemp("name","").(string)
    					//粉丝数
    					fansNum := ctx.GetTemp("fansNum","").(string)
    					//地区
    					city := ctx.GetTemp("city","").(string)
    					//微博数
    					weiboNum := ctx.GetTemp("weiboNum","").(string)
    					//关注数
    					attentionNum := ctx.GetTemp("attentionNum","").(string)
    
    					str := ctx.GetDom().Find("div").Eq(5).Text()
    
    					i := strings.LastIndex(str,"标签")
    					z := strings.LastIndex(str,"更多")
    
    					var str2,str3 string
    					if i == -1{
    						str2 = ""
    						str3 = str
    					}else{
    						//标签
    						str2 = str[i+7:z]
    
    						//详细信息
    						str3 = str[0:i]
    					}
    					ctx.Output(map[int]interface{}{
    						0: name,
    						1: fansNum,
    						2: city,
    						3: weiboNum,
    						4: attentionNum,
    						5: str2,
    						6: str3,
    					})
    
    
    				},
    			},
    
    		},
    	},
    }
    

     2.相关方法

    //获取unix时间
    func  getMillisecond() int64{
    	MS := time.Now().UnixNano()/1000
    	return MS
    }
    
    //用户名base64加密
    func encryptUname(uname string) string {  // 获取username base64加密后的结果
    	//println(base64.RawURLEncoding.EncodeToString([]byte(uname)))
    	return base64.URLEncoding.EncodeToString([]byte(uname))
    }
    
    //密码加密
    //把字符串转换bigint
    func string2big(s string) *big.Int {
    	ret := new(big.Int)
    	ret.SetString(s, 16)  // 将字符串转换成16进制
    	return ret
    }
    
    func encryptPassword(pubkey string,servertime string,nonce string, password string) string{
    	pub := rsa.PublicKey{
    		N: string2big(pubkey),
    		E: 65537,                       // 10001是十六进制数,65537是它的十进制表示
    	}
    
    	// servertime、nonce之间加	,然后在
     ,和password拼接
    	encryString := servertime + "	" + nonce + "
    " + password
    
    	// 拼接字符串加密
    	encryResult, _ := rsa.EncryptPKCS1v15(rand.Reader, &pub, []byte(encryString))
    	return hex.EncodeToString(encryResult)
    }
    
    欢迎指正,交流沟通,共同进步!对您有帮助的话点下推荐~~
  • 相关阅读:
    jquery ajax 返回数据时 ff正常,ie接受到数据但是显示不了
    查看IIS日志并且分析其中的错误日志
    用eventvwr查看系统日志
    C++实现Trie 树
    [算法之美笔记02] 栈模拟网页的前进后退 ; 阻塞队列与并发队列
    MySQL学习小记(三) 结合JDBC实现用户的登录响应
    [算法之美笔记01] 数组,链表的删除和垃圾回收,缓存机制有什么关系
    [埋坑系列] 基于QT/C++的杰瑞走迷宫小游戏 :1.大体构造
    品味C++实现AVL树的删除操作
    C++实现AVL树的四种旋转
  • 原文地址:https://www.cnblogs.com/gaoyawei/p/7463766.html
Copyright © 2011-2022 走看看