zoukankan      html  css  js  c++  java
  • 自学Python五 爬虫基础练习之SmartQQ协议

      BAT站在中国互联网的顶端,引导着中国互联网的发展走向。。。既受到了多数程序员的关注,也在被我们所惦记着。。。

      关于SmartQQ的协议来自HexBlog,根据他的博客我自己也一步一步的去分析,去尝试,自己不了解不知道的总是神秘的,如果你有这种好奇心,那么真相就只有一个。接下来我先把协议放出来,至于分析方法,以后有机会再谈谈。。。其实我也是个半吊子水平。。。谁知道下次改了协议还灵不灵呢!

      登录之前,获取二维码:https://ssl.ptlogin2.qq.com/ptqrshow?appid=501004106&e=0&l=M&s=5&d=72&v=4&t=0.22925435146316886,refer为:https://ui.ptlogin2.qq.com/cgi-bin/login

      循环获取二维码状态(是否失效,是否在手机上授权):https://ssl.ptlogin2.qq.com/ptqrlogin?webqq_type=10&remember_uin=1&login2qq=1&aid=501004106&u1=http%3A%2F%2Fw.qq.com%2Fproxy.html%3Flogin2qq%3D1%26webqq_type%3D10&ptredirect=0&ptlang=2052&daid=164&from_ui=1&pttype=1&dumy=&fp=loginerroralert&action=0-0-136435&mibao_css=m_webqq&t=undefined&g=1&js_type=0&js_ver=10139&login_sig=&pt_randsalt=0   refer为:https://ui.ptlogin2.qq.com/cgi-bin/login

      获取cookie中ptwebqq:这次的url是手机扫描二维码之后得到的返回值。refer不变。

      获取返回值vfwebqq:http://s.web2.qq.com/api/getvfwebqq?ptwebqq=" + ptwebqq + "&clientid=53999199&psessionid=&t=1446710396202。refer为http://s.web2.qq.com/proxy.html?v=20130916001&callback=1&id=1。

      成功登录,得到uin,psessionid:http://d.web2.qq.com/channel/login2,refer为http://d.web2.qq.com/proxy.html?v=20130916001&callback=1&id=2,数据为:"r=%7B%22ptwebqq%22%3A%22"+ptwebqq+"%22%2C%22clientid%22%3A53999199%2C%22psessionid%22%3A%22%22%2C%22status%22%3A%22online%22%7D",host地址为:"d1.web2.qq.com"

      根据得到的信息获取好友列表:http://s.web2.qq.com/api/get_user_friends2,数据为:data="r=%7B%22vfwebqq%22%3A%22"+vfwebqq+"%22%2C%22hash%22%3A%22"+__hash+"%22%7D",refer为:http://d.web2.qq.com/proxy.html?v=20130916001&callback=1&id=2

      让我们来看看程序:

    程序启动,扫描二维码登录

      

      下面我们用python来尝试一下(分为两个文件,其中继续沿用我们的HttpClient类,以及WebQQ类,代码如下):

     1 #HttpClient.py
      # -*- coding: utf-8 -*- 2 import cookielib, urllib, urllib2, socket 3 4 class HttpClient: 5 __cookie = cookielib.CookieJar() 6 __req = urllib2.build_opener(urllib2.HTTPCookieProcessor(__cookie)) 7 __req.addheaders = [ 8 ('Accept', 'application/javascript, */*;q=0.8'), 9 ('User-Agent', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)') 10 ] 11 urllib2.install_opener(__req) 12 13 def Get(self, url, refer=None): 14 try: 15 req = urllib2.Request(url) 16 if not (refer is None): 17 req.add_header('Referer', refer) 18 return urllib2.urlopen(req, timeout=120).read() 19 except urllib2.HTTPError, e: 20 return e.read() 21 except socket.timeout, e: 22 return '' 23 except socket.error, e: 24 return '' 25 26 def GetWithOutRead(self, url, refer=None): 27 try: 28 req = urllib2.Request(url) 29 if not (refer is None): 30 req.add_header('Referer', refer) 31 return urllib2.urlopen(req, timeout=120) 32 except urllib2.HTTPError, e: 33 return e.read() 34 except socket.timeout, e: 35 return '' 36 except socket.error, e: 37 return '' 38 39 def Post(self, url, data, refer=None): 40 try: 41 #req = urllib2.Request(url, urllib.urlencode(data)) 42 req = urllib2.Request(url,data) 43 if not (refer is None): 44 req.add_header('Referer', refer) 45 return urllib2.urlopen(req, timeout=120).read() 46 except urllib2.HTTPError, e: 47 return e.read() 48 except socket.timeout, e: 49 return '' 50 except socket.error, e: 51 return '' 52 53 def Download(self, url, file): 54 output = open(file, 'wb') 55 output.write(urllib2.urlopen(url).read()) 56 output.close() 57 61 def getCookie(self, key): 62 for c in self.__cookie: 63 if c.name == key: 64 return c.value 65 return '' 66 67 def setCookie(self, key, val, domain): 68 ck = cookielib.Cookie(version=0, name=key, value=val, port=None, port_specified=False, domain=domain, domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) 69 self.__cookie.set_cookie(ck)
      1 #WebQQ.py
      2 # -*- coding: utf-8 -*-
      3 from Tkinter import *
      4 from time import sleep
      5 from HttpClient import HttpClient
      6 import json,io
      7 from multiprocessing import Process
      8 import multiprocessing
      9 from PIL import Image, ImageTk
     10 class WebQQ(HttpClient):
     11     def __init__(self):
     12         self.__cookie = ""
     13         self.__ptwebqq = ""
     14         self.__vfwebqq = ""
     15         self.__hash = ""
     16         self.__uin = ""
     17         self.__root = ""
     18         self.__psessionid = ""
     19         self.queue = multiprocessing.Queue()
     20 
     21     #获取二维码
     22     def __ptqrshow(self):
     23         img = self.Get(url="https://ssl.ptlogin2.qq.com/ptqrshow?appid=501004106&e=0&l=M&s=5&d=72&v=4&t=0.4139144900254905")
     24         if(img!=None):
     25             data_stream = io.BytesIO(img)
     26             imgfile = Image.open(data_stream)
     27             p = Process(target=self._run_proc, args=(imgfile,))
     28             p.start()
     29             print(u"二维码下载完毕,请尽快扫描...")
     30             return True
     31         else:
     32             print(u"二维码下载失败")
     33             return False
     34 
     35     #检测扫码状态,登录进度
     36     def __ptqrlogin(self):
     37         res = self.Get("https://ssl.ptlogin2.qq.com/ptqrlogin?webqq_type=10&remember_uin=1"+
     38                        "&login2qq=1&aid=501004106&u1=http%3A%2F%2Fw.qq.com%2Fproxy.html%3Flogin2qq%3D1%26webqq_type%3D10"+
     39                        "&ptredirect=0&ptlang=2052&daid=164&from_ui=1&pttype=1&dumy=&fp=loginerroralert&action=0-0-136435"+
     40                        "&mibao_css=m_webqq&t=undefined&g=1&js_type=0&js_ver=10139&login_sig=&pt_randsalt=0",
     41                        "https://ui.ptlogin2.qq.com/cgi-bin/login")
     42         if(res!=None):
     43             result = res.find("登录成功")
     44             if(result==-1):
     45                 sleep(1)
     46 
     47                 return self.__ptqrlogin()
     48             elif(result!=-1):
     49                 res = res.decode("UTF-8")
     50                 return self.__check_sig(res[res.find("http"):res.find(u"','0','登录成功!'")].encode())
     51             else:
     52                 return False
     53         else:
     54             return False
     55     #获得ptwebqq  cookie
     56     def __check_sig(self,url):
     57         res = self.Get(url=url,refer="https://ui.ptlogin2.qq.com/cgi-bin/login")
     58         if(res!=None):
     59             self.__ptwebqq = self.getCookie("ptwebqq")
     60             self.__getvfwebqq()
     61             return self.__login2()
     62         else:
     63             return False
     64     #获得vfwebqq  cookie
     65     def __getvfwebqq(self):
     66         res = self.Get(url="http://s.web2.qq.com/api/getvfwebqq?ptwebqq="+self.__ptwebqq+
     67                        "&clientid=53999199&psessionid=&t=1446710396202",
     68                      refer="http://d.web2.qq.com/proxy.html?v=20130916001&callback=1&id=1")
     69         if(res!=None):
     70             jsn = json.loads(res)
     71             self.__vfwebqq = jsn["result"]["vfwebqq"]
     72     #登录 获取uin psessionid
     73     def __login2(self):
     74         data = "r=%7B%22ptwebqq%22%3A%22"+self.__ptwebqq+"%22%2C%22clientid%22%3A53999199%2C%22psessionid%22%3A%22%22%2C%22status%22%3A%22online%22%7D"
     75 
     76         res = self.Post(url = "http://d1.web2.qq.com/channel/login2",
     77                         data = data.encode(encoding="utf8"),
     78                         refer = "http://d.web2.qq.com/proxy.html?v=20130916001&callback=1&id=2")
     79         if(res==None):
     80             return False
     81         jsn = json.loads(res)
     82         if(jsn["retcode"]==0):
     83             self.__uin = jsn["result"]["uin"]
     84             self.__psessionid = jsn["result"]["psessionid"]
     85             self.__hash = self.__friendsHash(self.__uin,self.__ptwebqq)
     86             self.__get_user_friends()
     87             return True
     88         else:
     89             return False
     90     #好友的hash  参考HexBlog  
     91     def __friendsHash(self,uin,pt):
     92         N=[0 for x in range(4)]
     93         V=[0 for x in range(4)]
     94         U=[0 for x in range(8)]
     95         #    字符串转换为字符数组
     96         k=pt.encode(encoding="UTF8")
     97         n=["0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F"]
     98         for x in range(len(k)):
     99             N[x%4]^=ord(k[x])
    100         x=int(uin)
    101         V[0] = x >> 24 & 255 ^ 69;
    102         V[1] = x >> 16 & 255 ^ 67;
    103         V[2] = x >> 8 & 255 ^ 79;
    104         V[3] = x & 255 ^ 75;        
    105         for x in range(8):
    106             U[x]=(x%2==0) and N[x>>1] or V[x>>1]
    107         result=""
    108         for x in U:
    109             result+=n[x>>4&15]
    110             result+=n[x&15]
    111         return result
    112     #获取好友列表
    113     def __get_user_friends(self):
    114         data="r=%7B%22vfwebqq%22%3A%22"+self.__vfwebqq+"%22%2C%22hash%22%3A%22"+self.__hash+"%22%7D"
    115         res=self.Post(url="http://s.web2.qq.com/api/get_user_friends2",
    116                        data=data.encode(encoding="utf8"),
    117                          refer="http://s.web2.qq.com/proxy.html?v=20130916001&callback=1&id=1")
    118         if(res!=None):
    119             #    记录好友列表
    120             jsn=json.loads(res)
    121             if(jsn["retcode"]==0):
    122                 #    正确返回列表后
    123                 for x in jsn["result"]["marknames"]:
    124                     print (x["markname"])
    125     #心跳包并接收消息,现在不能用了
    126     #def __poll(self):
    127     #    data="r=%7B%22ptwebqq%22%3A%22"+self.__ptwebqq+"%22%2C%22clientid%22%3A53999199%2C%22psessionid%22%3A%22"+self.__psessionid +"%22%2C%22key%22%3A%22%22%7D"
    128     #    res=self.Post(url="http://d1.web2.qq.com/channel/poll2",
    129     #                  data=data.encode(encoding="utf8"),
    130     #                  refer="http://d1.web2.qq.com/proxy.html?v=20151105001&callback=1&id=2")
    131     #    if(res!=None):
    132     #        print res
    133 
    134     def __check_queue(self):
    135         try:
    136             out = self.queue.get_nowait()
    137             if out == 'stop':
    138                 self.__do_stop()
    139                 return
    140             # Could check for other commands here, too
    141         except :
    142             pass
    143         self.__root.after(100, self.__check_queue)
    144 
    145     def __stop(self):
    146         self.queue.put('stop')
    147 
    148     def __do_stop(self):
    149         self.__root.destroy()
    150 
    151     def _run_proc(self,imgfile):
    152         self.__root = Tk()
    153         result = ImageTk.PhotoImage(imgfile)
    154         label = Label(self.__root, image=result)
    155         label.pack()
    156         self.__root.after(100, self.__check_queue)
    157         self.__root.mainloop()
    158     def run(self):
    159         if(self.__ptqrshow()):
    160             if(self.__ptqrlogin()):
    161                 print(u"登录成功")
    162                 self.__stop()
    163 
    164 if  __name__ =="__main__":
    165     qq=WebQQ()
    166     qq.run()
  • 相关阅读:
    马克思主义哲学是否只是“抄袭”和断章取义了别人的思想
    马克思的思想说到底都是抄袭
    答郭沫若的《卖淫妇的饶舌》(节录)--马克思思想批判
    联系的普遍性
    辩证
    (实用篇)使用PHP生成PDF文档
    discuz!
    Access是什么?
    putty 与winscp 区别
    xshell 与 putty
  • 原文地址:https://www.cnblogs.com/jixin/p/5133629.html
Copyright © 2011-2022 走看看