zoukankan      html  css  js  c++  java
  • 使用python模拟登陆百度

    #!/usr/bin/python
    # -*- coding: utf-8 -*-
    """
    Function:   Used to demostrate how to use Python code to emulate login baidu main page: http://www.baidu.com/
    Note:       Before try to understand following code, firstly, please read the related articles:
                (1)【整理】关于抓取网页,分析网页内容,模拟登陆网站的逻辑/流程和注意事项
    
    http://www.crifan.com/summary_about_flow_process_of_fetch_webpage_simulate_login_website_and_some_notice/
    
                (2) 【教程】手把手教你如何利用工具(IE9的F12)去分析模拟登陆网站(百度首页)的内部逻辑过程
    
    http://www.crifan.com/use_ie9_f12_to_analysis_the_internal_logical_process_of_login_baidu_main_page_website/
    
                (3) 【教程】模拟登陆网站 之 Python版
    
    http://www.crifan.com/emulate_login_website_using_python
    
    Version:    2012-11-06
    Author:     Crifan
    """
    
    import re;
    import cookielib;
    import urllib;
    import urllib2;
    import optparse;
    
    #------------------------------------------------------------------------------
    # check all cookies in cookiesDict is exist in cookieJar or not
    def checkAllCookiesExist(cookieNameList, cookieJar) :
        cookiesDict = {};
        for eachCookieName in cookieNameList :
            cookiesDict[eachCookieName] = False;
    
        allCookieFound = True;
        for cookie in cookieJar :
            if(cookie.name in cookiesDict) :
                cookiesDict[cookie.name] = True;
    
        for eachCookie in cookiesDict.keys() :
            if(not cookiesDict[eachCookie]) :
                allCookieFound = False;
                break;
    
        return allCookieFound;
    
    #------------------------------------------------------------------------------
    # just for print delimiter
    def printDelimiter():
        print '-'*80;
    
    #------------------------------------------------------------------------------
    # main function to emulate login baidu
    def emulateLoginBaidu():
        print "Function: Used to demostrate how to use Python code to emulate login baidu main page: http://www.baidu.com/";
        print "Usage: emulate_login_baidu_python.py -u yourBaiduUsername -p yourBaiduPassword";
        printDelimiter();
    
        # parse input parameters
        parser = optparse.OptionParser();
        parser.add_option("-u","--username",action="store",type="string",default='',dest="username",help="Your Baidu Username");
        parser.add_option("-p","--password",action="store",type="string",default='',dest="password",help="Your Baidu password");
        (options, args) = parser.parse_args();
        # export all options variables, then later variables can be used
        for i in dir(options):
            exec(i + " = options." + i);
    
        printDelimiter();
        print "[preparation] using cookieJar & HTTPCookieProcessor to automatically handle cookies";
        cj = cookielib.CookieJar();
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj));
        urllib2.install_opener(opener);
    
        printDelimiter();
        print "[step1] to get cookie BAIDUID";
        baiduMainUrl = "http://www.baidu.com/";
        resp = urllib2.urlopen(baiduMainUrl);
        #respInfo = resp.info();
        #print "respInfo=",respInfo;
        for index, cookie in enumerate(cj):
            print '[',index, ']',cookie;
    
        printDelimiter();
        print "[step2] to get token value";
        getapiUrl = "https://passport.baidu.com/v2/api/?getapi&class=login&tpl=mn&tangram=true";
        getapiResp = urllib2.urlopen(getapiUrl);
        #print "getapiResp=",getapiResp;
        getapiRespHtml = getapiResp.read();
        #print "getapiRespHtml=",getapiRespHtml;
        #bdPass.api.params.login_token='5ab690978812b0e7fbbe1bfc267b90b3';
        foundTokenVal = re.search("bdPass.api.params.login_token='(?P<tokenVal>w+)';", getapiRespHtml);
        if(foundTokenVal):
            tokenVal = foundTokenVal.group("tokenVal");
            print "tokenVal=",tokenVal;
    
            printDelimiter();
            print "[step3] emulate login baidu";
            staticpage = "http://www.baidu.com/cache/user/html/jump.html";
            baiduMainLoginUrl = "https://passport.baidu.com/v2/api/?login";
            postDict = {
                #'ppui_logintime': "",
                'charset'       : "utf-8",
                #'codestring'    : "",
                'token'         : tokenVal, #de3dbf1e8596642fa2ddf2921cd6257f
                'isPhone'       : "false",
                'index'         : "0",
                #'u'             : "",
                #'safeflg'       : "0",
                'staticpage'    : staticpage, #http%3A%2F%2Fwww.baidu.com%2Fcache%2Fuser%2Fhtml%2Fjump.html
                'loginType'     : "1",
                'tpl'           : "mn",
                'callback'      : "parent.bdPass.api.login._postCallback",
                'username'      : username,
                'password'      : password,
                #'verifycode'    : "",
                'mem_pass'      : "on",
            };
            postData = urllib.urlencode(postDict);
            # here will automatically encode values of parameters
            # such as:
            # encode http://www.baidu.com/cache/user/html/jump.html into http%3A%2F%2Fwww.baidu.com%2Fcache%2Fuser%2Fhtml%2Fjump.html
            #print "postData=",postData;
            req = urllib2.Request(baiduMainLoginUrl, postData);
            # in most case, for do POST request, the content-type, is application/x-www-form-urlencoded
            req.add_header('Content-Type', "application/x-www-form-urlencoded");
            resp = urllib2.urlopen(req);
            #for index, cookie in enumerate(cj):
            #    print '[',index, ']',cookie;
            cookiesToCheck = ['BDUSS', 'PTOKEN', 'STOKEN', 'SAVEUSERID'];
            loginBaiduOK = checkAllCookiesExist(cookiesToCheck, cj);
            if(loginBaiduOK):
                print "+++ Emulate login baidu is OK, ^_^";
            else:
                print "--- Failed to emulate login baidu !"
        else:
            print "Fail to extract token value from html=",getapiRespHtml;
    
    if __name__=="__main__":
        emulateLoginBaidu();
  • 相关阅读:
    git相关
    String,static,final
    tomcat和servlet的基本了解
    xml的相关知识
    js基础
    HTML
    Java之JDBC连接池
    Java之JDBC
    Java的内存模型
    Java的内存结构
  • 原文地址:https://www.cnblogs.com/jackyshan/p/3375947.html
Copyright © 2011-2022 走看看