zoukankan      html  css  js  c++  java
  • python3爬虫-知乎登陆

    py文件:

    from fake_useragent import UserAgent
    import requests
    from http import cookiejar
    import base64
    from PIL import Image
    import time, json
    import hashlib, hmac
    import execjs
    from urllib import parse
    
    ua = UserAgent()
    
    
    class MyException(Exception):
        def __init__(self, status, msg):
            self.status = status
            self.msg = msg
    
    
    class ZhiHu:
    
        def __init__(self, username=None, password=None):
            self.username = username
            self.password = password
            self.session = requests.Session()
            self.session.headers = {
                "user-agent": ua.random,
                "referer": "https://www.zhihu.com/",
                'host': 'www.zhihu.com',
            }
    
            self.session.cookies = cookiejar.LWPCookieJar(filename="./cookies.txt")
    
            self.login_param = {
                "client_id": "c3cef7c66a1843f8b3a9e6a1e3160e20",
                "grant_type": "password",
                "source": "com.zhihu.web",
                "username": "",
                "password": "",
                "ref_source": "homepage",
                "utm_source": "baidu",
    
            }
    
        def load_cookies(self):
            '''加载cookies,保存在session中'''
            try:
                self.session.cookies.load(ignore_discard=True, ignore_expires=True)
                return True
            except FileNotFoundError:
                return False
    
        def login(self, captcha_lang: str = "en", is_load_cookies: bool = True):
            '''
            这里进行登陆操作
            :param lang:  使用怎样的登陆验证,en表示验证码,zh表示点击倒立汉字
            :param is_load_cookies:  是否使用保存的cookies进行登陆
            :return:
            '''
    
            if self.load_cookies() and is_load_cookies:
                # 进行登陆操作
                print("读取cookies文件")
                if self.check__login():
                    print("登陆成功")
                    return
                print("cookies已经失效")
    
            # 走到这里说明是没有登陆的,在这里进行登陆操作
    
            # 检测用户名和密码已经输入了
            self.check_user_input()
    
            # 获取到xsrf的值,并且设置请求头
            headers = self.session.headers.copy()
            xsrf = self.get_xsrf()
            headers.update({
                "content-type": "application/x-www-form-urlencoded",
                "x-xsrftoken": xsrf,
                "x-zse-83": "3_1.1",
            })
    
            self.login_param.update({
                "username": self.username,
                "password": self.password,
                "lang": captcha_lang
            })
    
            # 进行formdata的创建
            timestamp = int(time.time() * 1000)
            self.login_param.update({
                "timestamp": timestamp,
                "captcha": self.get_captcha() or "",
                "signature": self.get_signature(timestamp)
            })
    
            formdata = self.__encrypt(self.login_param)
    
            url = "https://www.zhihu.com/api/v3/oauth/sign_in"
    
            # 进行登陆操作
            self.session.post(url=url, headers=headers, data=formdata)
            if self.check__login():
                self.session.cookies.save()
                print("cookies以写入文件")
                print("登录成功")
                return True
            print("登录失败")
    
        def check__login(self):
            '''判断是否已经是登陆状态'''
            url = "https://www.zhihu.com/"
            response = self.session.get(url=url, allow_redirects=False)
            if response.status_code == 302:
                return False
            elif response.status_code == 200:
                return True
    
        def check_user_input(self):
            if not self.username:
                self.username = input("请输入手机号>>:").strip()
            if self.username.isdigit() and not self.username.startswith("+86"):
                self.username = "+86" + self.username
    
            if not self.password:
                self.password = input("请输入密码>>:").strip()
    
        def get_captcha(self):
            '''获取到验证码,这里至少请求一次,请求的方法的顺序get,put,post'''
            lang = self.login_param.get("lang")
            if lang == "en":
                captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=en"
            else:
                captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=cn"
            response = self.session.get(captcha_api)
            is_use_verify = response.json().get("show_captcha", False)
            if is_use_verify:
                # 使用验证,请求方式顺序为put,post
                # 先获取验证图片的base64
                response = self.session.put(captcha_api)
                base64_img = response.json()['img_base64'].replace(r'
    ', '')
                with open("./captcha.png", "wb") as f:
                    f.write(base64.b64decode(base64_img))
                img = Image.open("./captcha.png")
                if lang == "en":
                    img.show()
                    code = input("请输入图片中的验证码>>:").strip()
                else:
                    import matplotlib.pyplot as plt
                    plt.imshow(img)
                    print('点击所有倒立的汉字,在命令行中按回车提交')
                    points = plt.ginput(7)
                    code = json.dumps({'img_size': [200, 44],
                                       'input_points': [[i[0] / 2, i[1] / 2] for i in points]})
    
                self.session.post(captcha_api, data={"input_text": code}, headers={"user-agent": ua.random, })
                return code
    
        def get_no_captch(self):
            '''调用这个方法,可以实现不需要验证码就可以登录'''
            lang = self.login_param.get("lang")
            if lang == "en":
                captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=en"
            else:
                captcha_api = "https://www.zhihu.com/api/v3/oauth/captcha?lang=cn"
            while True:
                print("正在请求验证码....")
                time.sleep(0.5)
                response = self.session.get(captcha_api)
                is_use_verify = str(response.json().get("show_captcha"))
                if is_use_verify == 'false':
                    return ""
                print("继续...")
    
        def get_signature(self, timestamp):
            '''获取signature的值'''
            ha = hmac.new(key=b"d1b964811afb40118a12068ff74a12f4", digestmod=hashlib.sha1)
            client_id = self.login_param.get("client_id")
            grant_type = self.login_param.get("grant_type")
            source = self.login_param.get("source")
            ha.update(bytes(grant_type + client_id + source + str(timestamp), encoding="utf-8"))
            return ha.hexdigest()
    
        def get_xsrf(self):
            url = "https://www.zhihu.com/signin"
            response = self.session.get(url=url, headers=self.session.headers, allow_redirects=False)
            _xsrf = response.cookies.get("_xsrf")
            return _xsrf
    
        def __encrypt(self, data: dict):
            data = parse.urlencode(data)
            with open("./01.js", "r") as f:
                js_code = f.read()
            ctx = execjs.compile(js_code)
            res = ctx.call("Q", data)
            return res
    
    
    if __name__ == '__main__':
        zhihu = ZhiHu()
        zhihu.login()

    js文件:

    window = {
        "encodeURIComponent": encodeURIComponent
    }
    navigator = {
        "userAgent": "5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
    }
    
    function s(e) {
        return (s = "function" == typeof Symbol && "symbol" == typeof Symbol.t ? function (e) {
                    return typeof e
                }
                : function (e) {
                    return e && "function" == typeof Symbol && e.constructor === Symbol && e !== Symbol.prototype ? "symbol" : typeof e
                }
        )(e)
    }
    
    var t = "1.1"
        , __g = {};
    
    function i() {
    }
    
    function h(e) {
        this.s = (2048 & e) >> 11,
            this.i = (1536 & e) >> 9,
            this.h = 511 & e,
            this.A = 511 & e
    }
    
    function A(e) {
        this.i = (3072 & e) >> 10,
            this.A = 1023 & e
    }
    
    function n(e) {
        this.n = (3072 & e) >> 10,
            this.e = (768 & e) >> 8,
            this.a = (192 & e) >> 6,
            this.s = 63 & e
    }
    
    function e(e) {
        this.i = e >> 10 & 3,
            this.h = 1023 & e
    }
    
    function a() {
    }
    
    function c(e) {
        this.n = (3072 & e) >> 10,
            this.e = (768 & e) >> 8,
            this.a = (192 & e) >> 6,
            this.s = 63 & e
    }
    
    function o(e) {
        this.A = (4095 & e) >> 2,
            this.s = 3 & e
    }
    
    function r(e) {
        this.i = e >> 10 & 3,
            this.h = e >> 2 & 255,
            this.s = 3 & e
    }
    
    function k(e) {
        this.s = (4095 & e) >> 10,
            this.i = (1023 & e) >> 8,
            this.h = 1023 & e,
            this.A = 63 & e
    }
    
    function B(e) {
        this.s = (4095 & e) >> 10,
            this.n = (1023 & e) >> 8,
            this.e = (255 & e) >> 6
    }
    
    function f(e) {
        this.i = (3072 & e) >> 10,
            this.A = 1023 & e
    }
    
    function u(e) {
        this.A = 4095 & e
    }
    
    function C(e) {
        this.i = (3072 & e) >> 10
    }
    
    function b(e) {
        this.A = 4095 & e
    }
    
    function g(e) {
        this.s = (3840 & e) >> 8,
            this.i = (192 & e) >> 6,
            this.h = 63 & e
    }
    
    function G() {
        this.c = [0, 0, 0, 0],
            this.o = 0,
            this.r = [],
            this.k = [],
            this.B = [],
            this.f = [],
            this.u = [],
            this.C = !1,
            this.b = [],
            this.g = [],
            this.G = !1,
            this.Q = null,
            this.R = null,
            this.w = [],
            this.x = 0,
            this.D = {
                0: i,
                1: h,
                2: A,
                3: n,
                4: e,
                5: a,
                6: c,
                7: o,
                8: r,
                9: k,
                10: B,
                11: f,
                12: u,
                13: C,
                14: b,
                15: g
            }
    }
    
    i.prototype.M = function (e) {
        e.G = !1
    }
        ,
        h.prototype.M = function (e) {
            switch (this.s) {
                case 0:
                    e.c[this.i] = this.h;
                    break;
                case 1:
                    e.c[this.i] = e.k[this.A]
            }
        }
        ,
        A.prototype.M = function (e) {
            e.k[this.A] = e.c[this.i]
        }
        ,
        n.prototype.M = function (e) {
            switch (this.s) {
                case 0:
                    e.c[this.n] = e.c[this.e] + e.c[this.a];
                    break;
                case 1:
                    e.c[this.n] = e.c[this.e] - e.c[this.a];
                    break;
                case 2:
                    e.c[this.n] = e.c[this.e] * e.c[this.a];
                    break;
                case 3:
                    e.c[this.n] = e.c[this.e] / e.c[this.a];
                    break;
                case 4:
                    e.c[this.n] = e.c[this.e] % e.c[this.a];
                    break;
                case 5:
                    e.c[this.n] = e.c[this.e] == e.c[this.a];
                    break;
                case 6:
                    e.c[this.n] = e.c[this.e] >= e.c[this.a];
                    break;
                case 7:
                    e.c[this.n] = e.c[this.e] || e.c[this.a];
                    break;
                case 8:
                    e.c[this.n] = e.c[this.e] && e.c[this.a];
                    break;
                case 9:
                    e.c[this.n] = e.c[this.e] !== e.c[this.a];
                    break;
                case 10:
                    e.c[this.n] = s(e.c[this.e]);
                    break;
                case 11:
                    e.c[this.n] = e.c[this.e] in e.c[this.a];
                    break;
                case 12:
                    e.c[this.n] = e.c[this.e] > e.c[this.a];
                    break;
                case 13:
                    e.c[this.n] = -e.c[this.e];
                    break;
                case 14:
                    e.c[this.n] = e.c[this.e] < e.c[this.a];
                    break;
                case 15:
                    e.c[this.n] = e.c[this.e] & e.c[this.a];
                    break;
                case 16:
                    e.c[this.n] = e.c[this.e] ^ e.c[this.a];
                    break;
                case 17:
                    e.c[this.n] = e.c[this.e] << e.c[this.a];
                    break;
                case 18:
                    e.c[this.n] = e.c[this.e] >>> e.c[this.a];
                    break;
                case 19:
                    e.c[this.n] = e.c[this.e] | e.c[this.a]
            }
        }
        ,
        e.prototype.M = function (e) {
            e.r.push(e.o),
                e.B.push(e.k),
                e.o = e.c[this.i],
                e.k = [];
            for (var t = 0; t < this.h; t++)
                e.k.unshift(e.f.pop());
            e.u.push(e.f),
                e.f = []
        }
        ,
        a.prototype.M = function (e) {
            e.o = e.r.pop(),
                e.k = e.B.pop(),
                e.f = e.u.pop()
        }
        ,
        c.prototype.M = function (e) {
            switch (this.s) {
                case 0:
                    e.C = e.c[this.n] >= e.c[this.e];
                    break;
                case 1:
                    e.C = e.c[this.n] <= e.c[this.e];
                    break;
                case 2:
                    e.C = e.c[this.n] > e.c[this.e];
                    break;
                case 3:
                    e.C = e.c[this.n] < e.c[this.e];
                    break;
                case 4:
                    e.C = e.c[this.n] == e.c[this.e];
                    break;
                case 5:
                    e.C = e.c[this.n] != e.c[this.e];
                    break;
                case 6:
                    e.C = e.c[this.n];
                    break;
                case 7:
                    e.C = !e.c[this.n]
            }
        }
        ,
        o.prototype.M = function (e) {
            switch (this.s) {
                case 0:
                    e.o = this.A;
                    break;
                case 1:
                    e.C && (e.o = this.A);
                    break;
                case 2:
                    e.C || (e.o = this.A);
                    break;
                case 3:
                    e.o = this.A,
                        e.Q = null
            }
            e.C = !1
        }
        ,
        r.prototype.M = function (e) {
            switch (this.s) {
                case 0:
                    for (var t = [], n = 0; n < this.h; n++)
                        t.unshift(e.f.pop());
                    e.c[3] = e.c[this.i](t[0], t[1]);
                    break;
                case 1:
                    for (var r = e.f.pop(), o = [], i = 0; i < this.h; i++)
                        o.unshift(e.f.pop());
                    e.c[3] = e.c[this.i][r](o[0], o[1]);
                    break;
                case 2:
                    for (var a = [], c = 0; c < this.h; c++)
                        a.unshift(e.f.pop());
                    e.c[3] = new e.c[this.i](a[0], a[1])
            }
        }
        ,
        k.prototype.M = function (e) {
            switch (this.s) {
                case 0:
                    e.f.push(e.c[this.i]);
                    break;
                case 1:
                    e.f.push(this.h);
                    break;
                case 2:
                    e.f.push(e.k[this.A]);
                    break;
                case 3:
                    e.f.push(e.g[this.A])
            }
        }
        ,
        B.prototype.M = function (t) {
            switch (this.s) {
                case 0:
                    var s = t.f.pop();
                    t.c[this.n] = t.c[this.e][s];
                    break;
                case 1:
                    var i = t.f.pop()
                        , h = t.f.pop();
                    t.c[this.e][i] = h;
                    break;
                case 2:
                    var A = t.f.pop();
                    t.c[this.n] = eval(A)
            }
        }
        ,
        f.prototype.M = function (e) {
            e.c[this.i] = e.g[this.A]
        }
        ,
        u.prototype.M = function (e) {
            e.Q = this.A
        }
        ,
        C.prototype.M = function (e) {
            throw e.c[this.i]
        }
        ,
        b.prototype.M = function (e) {
            var t = this
                , n = [0];
            e.k.forEach(function (e) {
                n.push(e)
            });
            var r = function (r) {
                var o = new G;
                return o.k = n,
                    o.k[0] = r,
                    o.J(e.b, t.A, e.g, e.w),
                    o.c[3]
            };
            r.toString = function () {
                return "() { [native code] }"
            }
                ,
                e.c[3] = r
        }
        ,
        g.prototype.M = function (e) {
            switch (this.s) {
                case 0:
                    for (var t = {}, n = 0; n < this.h; n++) {
                        var r = e.f.pop();
                        t[e.f.pop()] = r
                    }
                    e.c[this.i] = t;
                    break;
                case 1:
                    for (var o = [], i = 0; i < this.h; i++)
                        o.unshift(e.f.pop());
                    e.c[this.i] = o
            }
        }
        ,
        G.prototype.v = function (e) {
            for (var t = new Buffer(e, "base64").toString("binary"), n = [], r = 0; r < t.length - 1; r += 2)
                n.push(t.charCodeAt(r) << 8 | t.charCodeAt(r + 1));
            this.b = n
        }
        ,
        G.prototype.y = function (e) {
            for (var t = new Buffer(e, "base64").toString("binary"), n = 66, r = [], o = 0; o < t.length; o++) {
                var i = 24 ^ t.charCodeAt(o) ^ n;
                r.push(String.fromCharCode(i)),
                    n = i
            }
            return r.join("")
        }
        ,
        G.prototype.F = function (e) {
            var t = this;
            this.g = e.map(function (e) {
                return "string" == typeof e ? t.y(e) : e
            })
        }
        ,
        G.prototype.J = function (e, t, n) {
            for (t = t || 0,
                     n = n || [],
                     this.o = t,
                     "string" == typeof e ? (this.F(n),
                         this.v(e)) : (this.b = e,
                         this.g = n),
                     this.G = !0,
                     this.x = Date.now(); this.G;) {
                var r = this.b[this.o++];
                if ("number" != typeof r)
                    break;
                var o = Date.now();
                if (500 < o - this.x)
                    return;
                this.x = o;
                try {
                    this.M(r)
                } catch (e) {
                    if (this.R = e,
                        !this.Q)
                        throw "execption at " + this.o + ": " + e;
                    this.o = this.Q
                }
            }
        }
        ,
        G.prototype.M = function (e) {
            var t = (61440 & e) >> 12;
            new this.D[t](e).M(this)
        }
        ,
    1 && (new G).J("4AeTAJwAqACcAaQAAAAYAJAAnAKoAJwDgAWTACwAnAKoACACGAESOTRHkQAkAbAEIAMYAJwFoAASAzREJAQYBBIBNEVkBnCiGAC0BjRAJAAYBBICNEVkBnDGGAC0BzRAJACwCJAAnAmoAJwKoACcC4ABnAyMBRAAMwZgBnESsA0aADRAkQAkABgCnA6gABoCnA+hQDRHGAKcEKAAMQdgBnFasBEaADRAkQAkABgCnBKgABoCnBOhQDRHZAZxkrAUGgA0QJEAJAAYApwVoABgBnG6sBYaADRAkQAkABgCnBegAGAGceKwGBoANECRACQAnAmoAJwZoABgBnIOsBoaADRAkQAkABgCnBugABoCnByhQDRHZAZyRrAdGgA0QJEAJAAQACAFsB4gBhgAnAWgABIBNEEkBxgHEgA0RmQGdJoQCBoFFAE5gCgFFAQ5hDSCJAgYB5AAGACcH4AFGAEaCDRSEP8xDzMQIAkQCBoFFAE5gCgFFAQ5hDSCkQAkCBgBGgg0UhD/MQ+QACAIGAkaBxQBOYGSABoAnB+EBRoIN1AUCDmRNJMkCRAIGgUUATmAKAUUBDmENIKRACQIGAEaCDRSEP8xD5AAIAgYCRoHFAI5gZIAGgCcH4QFGgg3UBQQOZE0kyQJGAMaCRQ/OY+SABoGnCCEBTTAJAMYAxoJFAY5khI/Nk+RABoGnCCEBTTAJAMYAxoJFAw5khI/Nk+RABoGnCCEBTTAJAMYAxoJFBI5khI/Nk+RABoGnCCEBTTAJAMYBxIDNEEkB3JsHgNQAA==", 0, ["BRgg", "BSITFQkTERw=", "LQYfEhMA", "PxMVFBMZKB8DEjQaBQcZExMC", "", "NhETEQsE", "Whg=", "Wg==", "MhUcHRARDhg=", "NBcPBxYeDQMF", "Lx4ODys+GhMC", "LgM7OwAKDyk6Cg4=", "Mx8SGQUvMQ==", "SA==", "ORoVGCQgERcCAxo=", "BTcAERcCAxo=", "BRg3ABEXAgMaFAo=", "SQ==", "OA8LGBsP", "GC8LGBsP", "Tg==", "PxAcBQ==", "Tw==", "KRsJDgE=", "TA==", "LQofHg4DBwsP", "TQ==", "PhMaNCwZAxoUDQUeGQ==", "PhMaNCwZAxoUDQUeGTU0GQIeBRsYEQ8=", "Qg==", "BWpUGxkfGRsZFxkbGR8ZGxkHGRsZHxkbGRcZG1MbGR8ZGxkXGRFpGxkfGRsZFxkbGR8ZGxkHGRsZHxkbGRcZGw==", "ORMRCyk0Exk8LQ==", "ORMRCyst"]);
    var Q = function (e) {
        return __g._encrypt(e)
    };

    参考的是这位博主的博客:https://home.cnblogs.com/u/zkqiang

  • 相关阅读:
    [Qt] 事件机制(四)
    shell专题(六):条件判断
    最小生成树
    373. Find K Pairs with Smallest Sums
    gradle代理设置
    266. Palindrome Permutation
    53. Maximum Subarray
    378. Kth Smallest Element in a Sorted Matrix
    240. Search a 2D Matrix II
    74. Search a 2D Matrix
  • 原文地址:https://www.cnblogs.com/zhuchunyu/p/10782248.html
Copyright © 2011-2022 走看看