zoukankan      html  css  js  c++  java
  • 模拟登录

    一、模拟登录的意义

      当我们需要爬取的数据是登录之后的个人信息, 就需要使用模拟登录

    二、使用打码平台处理验证码

      云打码

      打码兔 

    三、注册:

        普通用户注册

          充值题分(1块钱)

        开发者用户注册

          添加软件下载调用示例

        填写相关信息

          用户名

          密码

          软件名称

    软件密钥

    四、调用实例

    import http.client, mimetypes, urllib, json, time, requests
    from get_img_code import YDMHttp
    
    
    def get_code(types, filename):
        # 用户名(普通用户的用户名)
        username    = 'lxh66685'
    
        # 密码
        password    = 'nihao123!'                            
    
        # 软件ID,开发者分成必要参数。登录开发者后台【我的软件】获得!
        appid       = 7971                                     
    
        # 软件密钥,开发者分成必要参数。登录开发者后台【我的软件】获得!
        appkey      = 'b6fef487706d29041c20e6f9da220669'    
    
        # 图片文件
        filename    = filename
    
        # 验证码类型,# 例:1004表示4位字母数字,不同类型收费不同。请准确填写,否则影响识别率。在此查询所有类型 http://www.yundama.com/price.html
        codetype    = types
    
        # 超时时间,秒
        timeout     = 30
    
        # 检查
        if (username == 'username'):
            print('请设置好相关参数再测试')
        else:
            # 初始化
            yundama = YDMHttp(username, password, appid, appkey)
    
            # 登陆云打码
            uid = yundama.login();
            print('uid: %s' % uid)
    
            # 查询余额
            balance = yundama.balance();
            print('balance: %s' % balance)
    
            # 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果
            cid, result = yundama.decode(filename, codetype, timeout);
            print('cid: %s, result: %s' % (cid, result))
            
            return result

    五、模拟登录古诗文网

    # 模拟登录古诗文网www.gushiwen.com
    import requests
    from lxml import etree
    from urllib import request
    
    # 将requests请求替换成session请求,可以自动处理cookie
    requests = requests.Session()
    
    url = "https://www.gushiwen.com/main/login.html"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
    }
    # 1. 向页面首页发送请求,下载验证码图片到本地
    page_text = requests.get(url=url, headers=headers).text
    
    tree = etree.HTML(page_text)
    img_url = "https://www.gushiwen.com/" + tree.xpath('//div[@class="lg_content"]/ul/li[3]/img/@src')[0]
    
    # request.urlretrieve(img_url, './code.jpg')
    page_content = requests.get(url=img_url, headers=headers).content
    
    with open('./code.jpg', 'wb') as f:
        f.write(page_content)
    
    
    # 2. 识别验证码图片
    code = get_code(1004, './code.jpg')
    print(code)
    
    data= {
        "user": "lxh661314",
        "pass": "nihao123!",
        "imgvc": code
    }
    
    # 3. 实现模拟登录
    response = requests.post(url=url, headers=headers, data=data)
    
    login_page_text = requests.get(url="https://www.gushiwen.com/u.html", headers=headers).text
    
    with open('./login.html', 'w', encoding="utf-8") as f:
        f.write(login_page_text)

    六、登录古诗文

    # 登录古诗文网
    import requests
    from lxml import etree
    
    requests = requests.Session()
    
    home_url = "https://so.gushiwen.org/user/login.aspx"
    login_url = "https://so.gushiwen.org/user/login.aspx"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
    }
    
    page_text = requests.get(url=home_url, headers=headers).text
    
    tree = etree.HTML(page_text)
    view_state = tree.xpath('//input[@id="__VIEWSTATE"]/@value')[0]
    view_state_generator = tree.xpath('//input[@name="__VIEWSTATEGENERATOR"]/@value')[0]
    img_code_url = "https://so.gushiwen.org" + tree.xpath('//img[@id="imgCode"]/@src')[0]
    
    print(view_state)
    
    page_content = requests.get(url=img_code_url, headers=headers).content
    
    with open('./code.png', 'wb') as f:
        f.write(page_content)
    
    code = get_code(1004, './code.png')
    
    data = {
        "__VIEWSTATE": view_state,
        "__VIEWSTATEGENERATOR": view_state_generator,
        "from": "",
        "email": "lxh661314@163.com",
        "pwd": "nihao123!",
        "code": code,
        "denglu": "登录",
    }
    
    # 模拟登录
    page_text = requests.post(url=login_url, headers=headers, data=data).text
    
    with open('./gushiwen.html', 'w', encoding='utf-8') as f:
        f.write(page_text)
    
    # 1. 模拟登录的请求参数有些是动态的, 需要从登录页面源码中动态解析获取
    # 2. 下载图片最好是使用requests模块来下载,它是会带上UA信息
    # 3. 使用Session()自动携带cookie请求
  • 相关阅读:
    8.3.ZooKeeper集群安装配置
    Shiro Demo 示例(SpringMVC-Mybatis-Shiro-redis)
    Shiro 学习
    JavaWeb项目:Shiro实现简单的权限控制(整合SSM)
    权限管理——权限模型和权限控制
    Java程序员面试题集(151-180)
    Java面试题集(136-150)
    Java面试题集(131-135)
    Java面试题集(116-135)
    Java面试题集(1-50)
  • 原文地址:https://www.cnblogs.com/youhongliang/p/12707676.html
Copyright © 2011-2022 走看看