zoukankan      html  css  js  c++  java
  • python模拟163登陆获取邮件列表

    利用cookielib和urllib2模块模拟登陆163的例子有很多,近期看了《python模拟登陆163邮箱并获取通讯录》一文,受到启发,试着对收件箱、发件箱等进行了分析,并列出了所有邮件列表及状态,包括发件人、收件人、主题、发信时间、已读未读等状态。

    1、参考代码:http://hi.baidu.com/fc_lamp/blog/item/2466d1096fcc532de8248839.html%EF%BB%BF

      1 #-*- coding:UTF-8 -*-
      2 import urllib,urllib2,cookielib
      3 import xml.etree.ElementTree as etree #xml解析类
      4 
      5 class Login163:
      6    #伪装browser
      7     header = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
      8     username = ''
      9     passwd = ''
     10     cookie = None #cookie对象
     11     cookiefile = './cookies.dat' #cookie临时存放地
     12     user = ''
     13     
     14     def __init__(self,username,passwd):
     15         self.username = username
     16         self.passwd = passwd
     17         #cookie设置
     18         self.cookie = cookielib.LWPCookieJar() #自定义cookie存放
     19         opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie))
     20         urllib2.install_opener(opener)
     21 
     22    #登陆    
     23     def login(self):       
     24 
     25         #请求参数设置
     26         postdata = {
     27             'username':self.username,
     28             'password':self.passwd,
     29             'type':1
     30             }
     31         postdata = urllib.urlencode(postdata)
     32 
     33         #发起请求
     34         req = urllib2.Request(
     35                 url='http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1',
     36                 data= postdata,#请求数据
     37                 headers = self.header #请求头
     38             )
     39 
     40         result = urllib2.urlopen(req).read()
     41         result = str(result)
     42         self.user = self.username.split('@')[0]
     43 
     44         self.cookie.save(self.cookiefile)#保存cookie
     45         
     46         if '登录成功,正在跳转...' in result:
     47             #print("%s 你已成功登陆163邮箱。---------
    " %(user))
     48             flag = True
     49         else:
     50             flag = '%s 登陆163邮箱失败。'%(self.user)
     51            
     52         return flag
     53 
     54    #获取通讯录
     55     def address_list(self):
     56 
     57         #获取认证sid
     58         auth = urllib2.Request(
     59                 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1',
     60                 headers = self.header
     61             )
     62         auth = urllib2.urlopen(auth).read()
     63         for i,sid in enumerate(self.cookie):#enumerate()用于同时返数字索引与数值,实际上是一个元组:((0,test[0]),(1,test[1]).......)这有点像php里的foreach 语句的作用
     64             sid = str(sid)
     65             if 'sid' in sid:
     66                 sid = sid.split()[1].split('=')[1]
     67                 break
     68         self.cookie.save(self.cookiefile)
     69         
     70         #请求地址
     71         url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username
     72         #参数设定(var 变量是必需要的,不然就只能看到:<code>S_OK</code><messages/>这类信息)
     73         #这里参数也是在firebug下查看的。
     74         postdata = {
     75             'func':'global:sequential',
     76             'showAd':'false',
     77             'sid':sid,
     78             'uid':self.username,
     79             'userType':'browser',
     80             'var':'<?xml version="1.0"?><object><array name="items"><object><string name="func">pab:searchContacts</string><object name="var"><array name="order"><object><string name="field">FN</string><boolean name="desc">false</boolean><boolean name="ignoreCase">true</boolean></object></array></object></object><object><string name="func">pab:getAllGroups</string></object></array></object>'
     81             }
     82         postdata = urllib.urlencode(postdata)
     83         
     84         #组装请求
     85         req = urllib2.Request(
     86             url = url,
     87             data = postdata,
     88             headers = self.header
     89             )
     90         res = urllib2.urlopen(req).read()
     91         
     92         #解析XML,转换成json
     93         #说明:由于这样请求后163给出的是xml格式的数据,
     94         #为了返回的数据能方便使用最好是转为JSON
     95         json = []
     96         tree = etree.fromstring(res)
     97         obj = None
     98         for child in tree:
     99             if child.tag == 'array':
    100                 obj = child            
    101                 break
    102         #这里多参考一下,etree元素的方法属性等,包括attrib,text,tag,getchildren()等
    103         obj = obj[0].getchildren().pop()
    104         for child in obj:
    105             for x in child:
    106                 attr = x.attrib
    107                 if attr['name']== 'EMAIL;PREF':
    108                     value = {'email':x.text}
    109                     json.append(value)
    110         return json
    111         
    112 #Demo
    113 print("Requesting......
    
    ")
    114 login = Login163('xxxx@163.com','xxxxx')
    115 flag = login.login()
    116 if type(flag) is bool:
    117     print("Successful landing,Resolved contacts......
    
    ")
    118     res = login.address_list()
    119     for x in res:
    120         print(x['email'])
    121 else:
    122     print(flag)
    View Code

    2、分析收件箱、发件箱等网址

        在参考代码中,获取通讯录的url为

    url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username,通过对邮箱地址的分析,发现收件箱、发件箱等的url为url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=mbox:listMessages&showAd=false&userType=browser&uid='+self.username,其中func=

    mbox:listMessages。其对收件箱、发件箱的具体区分在下面的postdata中,具体为:

    (1)收件箱

    复制代码
    postdata = {
    'func':'global:sequential',
    'showAd':'false',
    'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',
    'uid':self.username,
    'userType':'browser',
    'var':'<!--?xml version="1.0"?--><object><int name="fid">1</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>'
    }
    复制代码

    (2)发件箱

    复制代码
    postdata = {
    'func':'global:sequential',
    'showAd':'false',
    'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',
    'uid':self.username,
    'userType':'browser',
    'var':'<!--?xml version="1.0"?--><object><int name="fid">3</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>'
    }
    复制代码

    可以看出,两段代码的不同之处就是fid的取值不同,其中收件箱为1,发件箱为3,草稿箱为2。

    3、xml解析

        利用ElementTree 类来进行xml到字典的转换。在获取通讯录的实例中,主要使用了这一方法。本例子(具体代码见后文)在收取邮件列表时,并没有用这一方法,仍然使用的是字符串的处理方法。但这里还是列一下ElementTree 类对xml的处理。如(参考地址:http://hi.baidu.com/fc_lamp/blog/item/8ed2d53ada4586f714cecb3d.html):

    复制代码
    -<result>
       <code>S_OK</code>
     -<array name="var">
      -<object>
         <string name="code">S_OK</string>
        -<array name="var">
         +<object></object>
         +<object></object>
         +<object></object>
         +<object></object>
         +<object></object>
         +<object></object>
         +<object></object>
         +<object></object>
         +<object></object>
         +<object></object>
         +<object></object>
         +<object></object>
         +<object></object>
         +<object></object>
         +<object></object>
         +<object></object>
         </array>
       </object>
      +<object></object>
      </array>
     </result>
    复制代码

    解决方法:

     1 #-*- coding:UTF-8 -*-
     2 
     3 import xml.etree.ElementTree as etree #xml解析类
     4 def xml2json(xml):
     5     json = []
     6     tree = etree.fromstring(xml) #如果是文件可用parse(source)
     7     obj = None
     8     for child in tree:
     9         if child.tag == 'array':
    10             obj = child            
    11             break
    12     #这里多参考一下,etree元素的方法属性等,包括attrib,text,tag,getchildren()等
    13     obj = obj[0].getchildren().pop()
    14     for child in obj:
    15         for x in child:
    16             attr = x.attrib
    17             if attr['name']== 'EMAIL;PREF':
    18                 value = {'email':x.text}
    19                 json.append(value)
    20     return json

    4、收件箱邮件列表

        本例子只列出了收件箱邮件列表,如果需要,可根据以上介绍调整fid值,列出发件箱、草稿箱等的邮件列表。程序在windosxp、py2.6环境下调查通过,运行后,会在当前目录下生成三个文件:inboxlistfile.txt记录收件箱邮件列表,addfile.txt记录通讯录,cookies.dat记录cookies。具体代码如下:

      1 #-*- coding:UTF-8 -*-
      2 #@小五义 http://www.cnblogs.com/xiaowuyi
      3 #163邮件列表
      4 import urllib,urllib2,cookielib
      5 import xml.etree.ElementTree as etree #xml解析类
      6 
      7 class Login163:
      8    #伪装browser
      9     header = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
     10     username = ''
     11     passwd = ''
     12     cookie = None #cookie对象
     13     cookiefile = './cookies.dat' #cookie临时存放地
     14     user = ''
     15     
     16     def __init__(self,username,passwd):
     17         self.username = username
     18         self.passwd = passwd
     19         #cookie设置
     20         self.cookie = cookielib.LWPCookieJar() #自定义cookie存放
     21         opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie))
     22         urllib2.install_opener(opener)
     23 
     24    #登陆    
     25     def login(self):       
     26 
     27         #请求参数设置
     28         postdata = {
     29             'username':self.username,
     30             'password':self.passwd,
     31             'type':1
     32             }
     33         postdata = urllib.urlencode(postdata)
     34 
     35         #发起请求
     36         req = urllib2.Request(
     37                 url='http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1',
     38                 data= postdata,#请求数据
     39                 headers = self.header #请求头
     40             )
     41 
     42         result = urllib2.urlopen(req).read()
     43         result = str(result)
     44         #print result
     45         self.user = self.username.split('@')[0]
     46 
     47         self.cookie.save(self.cookiefile)#保存cookie
     48         
     49         if '登录成功,正在跳转...' in result:
     50             #print("%s 你已成功登陆163邮箱。---------n" %(user))
     51             flag = True
     52         else:
     53             flag = '%s 登陆163邮箱失败。'%(self.user)
     54            
     55         return flag
     56 
     57    #获取通讯录
     58     def address_list(self):
     59 
     60         #获取认证sid
     61         auth = urllib2.Request(
     62                 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1',
     63                 headers = self.header
     64             )
     65         auth = urllib2.urlopen(auth).read()
     66 
     67         #authstr=str(auth)
     68         #print authstr
     69         
     70         for i,sid in enumerate(self.cookie):
     71             sid = str(sid)
     72             #print 'sid:%s' %sid
     73             if 'sid' in sid:
     74                 sid = sid.split()[1].split('=')[1]
     75                 break
     76         self.cookie.save(self.cookiefile)
     77         
     78         #请求地址
     79         url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username
     80         #参数设定(var 变量是必需要的,不然就只能看到:<code>S_OK</code><messages>这类信息)
     81         #这里参数也是在firebug下查看的。
     82         postdata = {
     83             'func':'global:sequential',
     84             'showAd':'false',
     85             'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',
     86             'uid':self.username,
     87             'userType':'browser',
     88             'var':'<!--?xml version="1.0"?--><object><array name="items"><object><string name="func">pab:searchContacts</string><object name="var"><array name="order"><object><string name="field">FN</string><boolean name="desc">false</boolean><boolean name="ignoreCase">true</boolean></object></array></object></object><object><string name="func">pab:getAllGroups</string></object></array></object>'
     89             }
     90         postdata = urllib.urlencode(postdata)
     91         
     92         #组装请求
     93         req = urllib2.Request(
     94             url = url,
     95             data = postdata,
     96             headers = self.header
     97             )
     98         res = urllib2.urlopen(req).read()
     99 
    100         #print str(res)
    101         
    102         #解析XML,转换成json
    103         #说明:由于这样请求后163给出的是xml格式的数据,
    104         #为了返回的数据能方便使用最好是转为JSON
    105         json = []
    106         tree = etree.fromstring(res)
    107 
    108         
    109         
    110         obj = None
    111         for child in tree:
    112             if child.tag == 'array':
    113                 obj = child            
    114                 break
    115         #这里多参考一下,etree元素的方法属性等,包括attrib,text,tag,getchildren()等
    116         obj = obj[0].getchildren().pop()
    117         for child in obj:
    118             for x in child:
    119                 attr = x.attrib
    120                 if attr['name']== 'EMAIL;PREF':
    121                     value = {'email':x.text}
    122                     json.append(value)
    123         return json
    124 
    125     def minbox(self):#收件箱,fid为1,发件箱为3,草稿箱为2
    126         #获取认证sid
    127         auth = urllib2.Request(
    128                 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1',
    129                 headers = self.header
    130             )
    131         auth = urllib2.urlopen(auth).read()
    132 
    133         #authstr=str(auth)
    134         #print authstr
    135         
    136         for i,sid in enumerate(self.cookie):
    137             sid = str(sid)
    138             #print 'sid:%s' %sid
    139             if 'sid' in sid:
    140                 sid = sid.split()[1].split('=')[1]
    141                 break
    142         self.cookie.save(self.cookiefile)
    143         
    144         #请求地址
    145         url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=mbox:listMessages&showAd=false&userType=browser&uid='+self.username
    146         #参数设定(var 变量是必需要的,不然就只能看到:<code>S_OK</code><messages>这类信息)
    147         #这里参数也是在firebug下查看的。
    148         postdata = {
    149             'func':'global:sequential',
    150             'showAd':'false',
    151             'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',
    152             'uid':self.username,
    153             'userType':'browser',
    154             'var':'<!--?xml version="1.0"?--><object><int name="fid">1</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>'
    155             }
    156         postdata = urllib.urlencode(postdata)
    157         
    158         #组装请求
    159         req = urllib2.Request(
    160             url = url,
    161             data = postdata,
    162             headers = self.header
    163             )
    164         res = urllib2.urlopen(req).read()
    165 
    166         liststr=str(res).split('<object>')#用object进行分割
    167         inboxlistcount=len(liststr)-1#记录邮件封数
    168         inboxlistfile=open('inboxlistfile.txt','a')
    169         t=0  #记录当前第几封信
    170         for i in liststr:
    171             if 'xml' in i and ' version=' in i:
    172                 inboxlistfile.write('inbox 共'+str(inboxlistcount)+'')
    173                 inboxlistfile.write('
    ')
    174             if 'name="id"' in i:
    175                 t=t+1
    176                 inboxlistfile.write(''+str(t)+'封:')
    177                 inboxlistfile.write('
    ')
    178                 #写入from
    179                 beginnum=i.find('name="from"')
    180                 endnum=i.find('</string>',beginnum)
    181                 inboxlistfile.write('From:'+i[beginnum+12:endnum])
    182                 inboxlistfile.write('
    ')
    183                 #写入to
    184                 beginnum=i.find('name="to"')
    185                 endnum=i.find('</string>',beginnum)
    186                 inboxlistfile.write('TO:'+i[beginnum+10:endnum])
    187                 inboxlistfile.write('
    ')
    188                 #写入subject
    189                 beginnum=i.find('name="subject"')
    190                 endnum=i.find('</string>',beginnum)
    191                 inboxlistfile.write('Subject:'+i[beginnum+15:endnum])
    192                 inboxlistfile.write('
    ')
    193                 #写入date:
    194                 beginnum=i.find('name="sentDate"')
    195                 endnum=i.find('</date>',beginnum)
    196                 inboxlistfile.write('Date:'+i[beginnum+16:endnum])
    197                 inboxlistfile.write('
    ')
    198                 if 'name="read">true' in i:
    199                     inboxlistfile.write('邮件状态:已读')
    200                     inboxlistfile.write('
    ')
    201                 else:
    202                     inboxlistfile.write('邮件状态:未读')
    203                     inboxlistfile.write('
    ')
    204                 #写用邮件尺寸
    205                 beginnum=i.find('name="size"')
    206                 endnum=i.find('</int>',beginnum)
    207                 inboxlistfile.write('邮件尺寸:'+i[beginnum+12:endnum])
    208                 inboxlistfile.write('
    ')
    209                 #写入邮件编号,用于下载邮件
    210                 beginnum=i.find('name="id"')
    211                 endnum=i.find('</string>',beginnum)
    212                 inboxlistfile.write('邮件编号:'+i[beginnum+10:endnum])
    213                 inboxlistfile.write('
    
    ')
    214                 
    215         inboxlistfile.close()
    216                 
    217         
    218         
    219 #Demo
    220 print("Requesting......nn")
    221 login = Login163('AAAAA@163.com','AAAAA')
    222 flag = login.login()
    223 if type(flag) is bool:
    224     login.minbox()
    225     #login.letterdown()
    226     print("Successful landing,Resolved contacts......nn")
    227     res = login.address_list()
    228     for x in res:
    229         print(x['email'])
    230 else:
    231     print(flag)
     
  • 相关阅读:
    HDU1712:ACboy needs your help(分组背包模板)
    HDU1203:I NEED A OFFER!(01背包)
    HDU1171:Big Event in HDU
    POJ1014:Dividing(多重背包)
    HDU2191-悼念512汶川大地震遇难同胞——珍惜现在,感恩生活(多重背包入门)
    hdu2159FATE(二维背包)
    POJ1201 Intervals
    C++之运算符重载
    C++之强制类型转换
    MFC WinInetHttp抓取网页代码内容
  • 原文地址:https://www.cnblogs.com/cncz/p/4048549.html
Copyright © 2011-2022 走看看