zoukankan      html  css  js  c++  java
  • 爬虫之自动登入示例

    自动登入示例

    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    import requests
    
    
    # ############## 方式一 ##############
    """
    # ## 1、首先登陆任何页面,获取cookie
    i1 = requests.get(url="http://dig.chouti.com/help/service")
    i1_cookies = i1.cookies.get_dict()
    
    # ## 2、用户登陆,携带上一次的cookie,后台对cookie中的 gpsd 进行授权
    i2 = requests.post(
        url="http://dig.chouti.com/login",
        data={
            'phone': "8615131255089",
            'password': "xxooxxoo",
            'oneMonth': ""
        },
        cookies=i1_cookies
    )
    
    # ## 3、点赞(只需要携带已经被授权的gpsd即可)
    gpsd = i1_cookies['gpsd']
    i3 = requests.post(
        url="http://dig.chouti.com/link/vote?linksId=8589523",
        cookies={'gpsd': gpsd}
    )
    
    print(i3.text)
    """
    
    
    # ############## 方式二 ##############
    """
    import requests
    
    session = requests.Session()
    i1 = session.get(url="http://dig.chouti.com/help/service")
    i2 = session.post(
        url="http://dig.chouti.com/login",
        data={
            'phone': "8615131255089",
            'password': "xxooxxoo",
            'oneMonth': ""
        }
    )
    i3 = session.post(
        url="http://dig.chouti.com/link/vote?linksId=8589523"
    )
    print(i3.text)
    
    """
    
    抽屉新热榜
    抽屉新热榜
    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    
    import requests
    from bs4 import BeautifulSoup
    
    # ############## 方式一 ##############
    #
    # # 1. 访问登陆页面,获取 authenticity_token
    # i1 = requests.get('https://github.com/login')
    # soup1 = BeautifulSoup(i1.text, features='lxml')
    # tag = soup1.find(name='input', attrs={'name': 'authenticity_token'})
    # authenticity_token = tag.get('value')
    # c1 = i1.cookies.get_dict()
    # i1.close()
    #
    # # 1. 携带authenticity_token和用户名密码等信息,发送用户验证
    # form_data = {
    # "authenticity_token": authenticity_token,
    #     "utf8": "",
    #     "commit": "Sign in",
    #     "login": "wupeiqi@live.com",
    #     'password': 'xxoo'
    # }
    #
    # i2 = requests.post('https://github.com/session', data=form_data, cookies=c1)
    # c2 = i2.cookies.get_dict()
    # c1.update(c2)
    # i3 = requests.get('https://github.com/settings/repositories', cookies=c1)
    #
    # soup3 = BeautifulSoup(i3.text, features='lxml')
    # list_group = soup3.find(name='div', class_='listgroup')
    #
    # from bs4.element import Tag
    #
    # for child in list_group.children:
    #     if isinstance(child, Tag):
    #         project_tag = child.find(name='a', class_='mr-1')
    #         size_tag = child.find(name='small')
    #         temp = "项目:%s(%s); 项目路径:%s" % (project_tag.get('href'), size_tag.string, project_tag.string, )
    #         print(temp)
    
    
    
    # ############## 方式二 ##############
    # session = requests.Session()
    # # 1. 访问登陆页面,获取 authenticity_token
    # i1 = session.get('https://github.com/login')
    # soup1 = BeautifulSoup(i1.text, features='lxml')
    # tag = soup1.find(name='input', attrs={'name': 'authenticity_token'})
    # authenticity_token = tag.get('value')
    # c1 = i1.cookies.get_dict()
    # i1.close()
    #
    # # 1. 携带authenticity_token和用户名密码等信息,发送用户验证
    # form_data = {
    #     "authenticity_token": authenticity_token,
    #     "utf8": "",
    #     "commit": "Sign in",
    #     "login": "wupeiqi@live.com",
    #     'password': 'xxoo'
    # }
    #
    # i2 = session.post('https://github.com/session', data=form_data)
    # c2 = i2.cookies.get_dict()
    # c1.update(c2)
    # i3 = session.get('https://github.com/settings/repositories')
    #
    # soup3 = BeautifulSoup(i3.text, features='lxml')
    # list_group = soup3.find(name='div', class_='listgroup')
    #
    # from bs4.element import Tag
    #
    # for child in list_group.children:
    #     if isinstance(child, Tag):
    #         project_tag = child.find(name='a', class_='mr-1')
    #         size_tag = child.find(name='small')
    #         temp = "项目:%s(%s); 项目路径:%s" % (project_tag.get('href'), size_tag.string, project_tag.string, )
    #         print(temp)
    
    github
    github
    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    import time
    
    import requests
    from bs4 import BeautifulSoup
    
    session = requests.Session()
    
    i1 = session.get(
        url='https://www.zhihu.com/#signin',
        headers={
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
        }
    )
    
    soup1 = BeautifulSoup(i1.text, 'lxml')
    xsrf_tag = soup1.find(name='input', attrs={'name': '_xsrf'})
    xsrf = xsrf_tag.get('value')
    
    current_time = time.time()
    i2 = session.get(
        url='https://www.zhihu.com/captcha.gif',
        params={'r': current_time, 'type': 'login'},
        headers={
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
        })
    
    with open('zhihu.gif', 'wb') as f:
        f.write(i2.content)
    
    captcha = input('请打开zhihu.gif文件,查看并输入验证码:')
    form_data = {
        "_xsrf": xsrf,
        'password': 'xxooxxoo',
        "captcha": 'captcha',
        'email': '424662508@qq.com'
    }
    i3 = session.post(
        url='https://www.zhihu.com/login/email',
        data=form_data,
        headers={
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
        }
    )
    
    i4 = session.get(
        url='https://www.zhihu.com/settings/profile',
        headers={
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
        }
    )
    
    soup4 = BeautifulSoup(i4.text, 'lxml')
    tag = soup4.find(id='rename-section')
    nick_name = tag.find('span',class_='name').string
    print(nick_name)
    
    知乎
    知乎
    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    import re
    import json
    import base64
    
    import rsa
    import requests
    
    
    def js_encrypt(text):
        b64der = 'MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCp0wHYbg/NOPO3nzMD3dndwS0MccuMeXCHgVlGOoYyFwLdS24Im2e7YyhB0wrUsyYf0/nhzCzBK8ZC9eCWqd0aHbdgOQT6CuFQBMjbyGYvlVYU2ZP7kG9Ft6YV6oc9ambuO7nPZh+bvXH0zDKfi02prknrScAKC0XhadTHT3Al0QIDAQAB'
        der = base64.standard_b64decode(b64der)
    
        pk = rsa.PublicKey.load_pkcs1_openssl_der(der)
        v1 = rsa.encrypt(bytes(text, 'utf8'), pk)
        value = base64.encodebytes(v1).replace(b'
    ', b'')
        value = value.decode('utf8')
    
        return value
    
    
    session = requests.Session()
    
    i1 = session.get('https://passport.cnblogs.com/user/signin')
    rep = re.compile("'VerificationToken': '(.*)'")
    v = re.search(rep, i1.text)
    verification_token = v.group(1)
    
    form_data = {
        'input1': js_encrypt('wptawy'),
        'input2': js_encrypt('asdfasdf'),
        'remember': False
    }
    
    i2 = session.post(url='https://passport.cnblogs.com/user/signin',
                      data=json.dumps(form_data),
                      headers={
                          'Content-Type': 'application/json; charset=UTF-8',
                          'X-Requested-With': 'XMLHttpRequest',
                          'VerificationToken': verification_token}
                      )
    
    i3 = session.get(url='https://i.cnblogs.com/EditDiary.aspx')
    
    print(i3.text)
    
    博客园
    博客园
    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    
    import requests
    
    
    # 第一步:访问登陆页,拿到X_Anti_Forge_Token,X_Anti_Forge_Code
    # 1、请求url:https://passport.lagou.com/login/login.html
    # 2、请求方法:GET
    # 3、请求头:
    #    User-agent
    r1 = requests.get('https://passport.lagou.com/login/login.html',
                     headers={
                         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
                     },
                     )
    
    X_Anti_Forge_Token = re.findall("X_Anti_Forge_Token = '(.*?)'", r1.text, re.S)[0]
    X_Anti_Forge_Code = re.findall("X_Anti_Forge_Code = '(.*?)'", r1.text, re.S)[0]
    print(X_Anti_Forge_Token, X_Anti_Forge_Code)
    # print(r1.cookies.get_dict())
    # 第二步:登陆
    # 1、请求url:https://passport.lagou.com/login/login.json
    # 2、请求方法:POST
    # 3、请求头:
    #    cookie
    #    User-agent
    #    Referer:https://passport.lagou.com/login/login.html
    #    X-Anit-Forge-Code:53165984
    #    X-Anit-Forge-Token:3b6a2f62-80f0-428b-8efb-ef72fc100d78
    #    X-Requested-With:XMLHttpRequest
    # 4、请求体:
    # isValidate:true
    # username:15131252215
    # password:ab18d270d7126ea65915c50288c22c0d
    # request_form_verifyCode:''
    # submit:''
    r2 = requests.post(
        'https://passport.lagou.com/login/login.json',
        headers={
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
            'Referer': 'https://passport.lagou.com/login/login.html',
            'X-Anit-Forge-Code': X_Anti_Forge_Code,
            'X-Anit-Forge-Token': X_Anti_Forge_Token,
            'X-Requested-With': 'XMLHttpRequest'
        },
        data={
            "isValidate": True,
            'username': '15131255089',
            'password': 'ab18d270d7126ea65915c50288c22c0d',
            'request_form_verifyCode': '',
            'submit': ''
        },
        cookies=r1.cookies.get_dict()
    )
    print(r2.text)
    
    拉勾网
    拉勾网

    文章转载来源于:  https://www.cnblogs.com/wupeiqi/articles/6229292.html

  • 相关阅读:
    spark调度器FIFO,FAIR
    elasticsearch5.6.8 创建TransportClient工具类
    elasticsearch TransportClient bulk批量提交数据
    java 参数来带回方法运算结果
    idea上传代码到git本地仓库
    2020-03-01 助教一周小结(第三周)
    2020-02-23 助教一周小结(第二周)
    2020-02-16 助教一周小结(第一周)
    寻找两个有序数组的中位数
    无重复字符的最长子串
  • 原文地址:https://www.cnblogs.com/KIV-Y/p/10832654.html
Copyright © 2011-2022 走看看