zoukankan      html  css  js  c++  java
  • python智能识别验证码自动登录

    代码实现

    from typing import Sized
    from docx import Document
    import time
    from docxtpl import DocxTemplate,InlineImage,RichText
    from docx.shared import Mm
    from PIL import Image
    from selenium import webdriver
    import ssl
    import sys
    import json
    import base64
    
    
    # 初始化driver
    driver = webdriver.Chrome()   
    driver.set_window_size(1280, 800, driver.window_handles[0])
    driver.maximize_window()
    
    # 获取验证码图片
    def getimage(): 
        ele_vcode = driver.find_element_by_xpath("//*[@id='captchaImgU']")
        ele_vcode.click()
        time.sleep(2)
        ele_vcode.screenshot('vcode.png')
    
    # 百度api接口识别 coding=utf-8
    
    # post请求参数
    ssl._create_default_https_context = ssl._create_unverified_context
    API_KEY = 'fqe83vwceOl3A87umYHATbaB'
    SECRET_KEY = 'UFjtlGbBvhLAh1VSDok1apCuDx6AceRG'
    OCR_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
    TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token'
    
    # 保证兼容python2以及python3
    IS_PY3 = sys.version_info.major == 3
    if IS_PY3:
        from urllib.request import urlopen
        from urllib.request import Request
        from urllib.error import URLError
        from urllib.parse import urlencode
        from urllib.parse import quote_plus
    else:
        pass
    
    # 获取token
    def fetch_token():
        params = {'grant_type': 'client_credentials',
                  'client_id': API_KEY,
                  'client_secret': SECRET_KEY}
        post_data = urlencode(params)
        if (IS_PY3):
            post_data = post_data.encode('utf-8')
        req = Request(TOKEN_URL, post_data)
        try:
            f = urlopen(req, timeout=5)
            result_str = f.read()
        except URLError as err:
            print(err)
        if (IS_PY3):
            result_str = result_str.decode()
    
    
        result = json.loads(result_str)
    
        if ('access_token' in result.keys() and 'scope' in result.keys()):
            if not 'brain_all_scope' in result['scope'].split(' '):
                print ('please ensure has check the  ability')
                exit()
            return result['access_token']
        else:
            print ('please overwrite the correct API_KEY and SECRET_KEY')
            exit()
    
    
    # 读取文件
    def read_file(image_path):
        f = None
        try:
            f = open(image_path, 'rb')
            return f.read()
        except:
            print('read image file fail')
            return None
        finally:
            if f:
                f.close()
    
    
    # 调用远程服务
    def request(url, data):
        req = Request(url, data.encode('utf-8'))
        has_error = False
        try:
            f = urlopen(req)
            result_str = f.read()
            if (IS_PY3):
                result_str = result_str.decode()
            return result_str
        except  URLError as err:
            print(err)
    
    # 获取验证码
    def get_code():
        # 获取access token
        token = fetch_token()
        # 拼接通用文字识别高精度url
        image_url = OCR_URL + "?access_token=" + token
        text = ""
        # 读取测试图片
        file_content = read_file('vcode.png')
        # 调用文字识别服务
        result = request(image_url, urlencode({'image': base64.b64encode(file_content)}))
        result_json = json.loads(result)
        for words_result in result_json["words_result"]:
            text = text + words_result["words"]
        return text
    
    
    # 执行登录
    def phsc_login():
        driver.get("https://www.shgt.com/trade-web/login")
        time.sleep(5)
        while driver.title == '登录':   # 加入循环判断,登录不成功,重新获取验证码登录
            getimage()
            vcode = get_code()
            driver.find_element_by_xpath("//*[@name='user']").clear()
            driver.find_element_by_xpath("//*[@name='user']").send_keys("username")
            driver.find_element_by_xpath("//*[@name='pass']").clear()
            driver.find_element_by_xpath("//*[@name='pass']").send_keys("password")
            driver.find_element_by_xpath("//*[@name='validateCode']").clear()
            driver.find_element_by_xpath("//*[@name='validateCode']").send_keys(vcode)
            driver.find_element_by_xpath("//*[@class='el-button btn_login el-button--button']").click()
            time.sleep(5)
    
    phsc_login()
    driver.quit()
            
        
        

    参考文章:

    百度ORC接口入门:https://ai.baidu.com/ai-doc/OCR/dk3iqnq51

    如何用代码调用百度OCR服务:https://cloud.baidu.com/doc/OCR/s/Pkrwx9ye4

    【Python+selenium】带图片验证码的登录自动化实战:https://www.jianshu.com/p/6755a40d961f

    5行Python实现验证码识别(识别率一般):https://jishuin.proginn.com/p/763bfbd60bb1

  • 相关阅读:
    P1903 [国家集训队]数颜色 / 维护队列 莫对算法
    P1016 旅行家的预算 模拟 贪心
    P3948 数据结构 差分数组
    乘法逆元 模板
    二分法 最大化平均值
    HDU5213 Lucky 莫队算法 容斥定理
    P1083 借教室 差分数组
    发布订阅、redis的配置文件、redis的主从、redis的持久化、
    nosql、redis、性能测试、命令相关、redis的数据类型string、list、hash、set、zset、
    nginx的日志、禁止访问、反向代理、权重、nginx location匹配规则、location分离、WSGI、
  • 原文地址:https://www.cnblogs.com/soymilk2019/p/15594917.html
Copyright © 2011-2022 走看看