zoukankan html css js c++ java

python语言验证码识别，以后不用老输入验证码了。

1.Python 3.6 安装包

1.要加环境变量

2.pip安装PIL库

3.pip安装pytesseract模块

2.tesseract-ocr-setup-4.00.00dev.exe ---光学识别软件

D:Tesseract-OCR essdata 要加入环境变量。

3.jTessBoxEditor-2.2.0.zip 训练字库 ---依赖java环境

del /a /f /q "C:Program Files (x86)Tesseract-OCR essdata um.traineddata"

copy num.traineddata "C:Program Files (x86)Tesseract-OCR essdata"

pause

del /a /f /q num.font.exp0.tr

del /a /f /q num.inttemp

del /a /f /q num.normproto

del /a /f /q num.pffmtable

del /a /f /q num.traineddata

del /a /f /q num.shapetable

del /a /f /q num.unicharset

del /a /f /q unicharset

pause

echo Run Tesseract for Training..

tesseract.exe num.font.exp0.tif num.font.exp0 nobatch box.train

echo Compute the Character Set..

unicharset_extractor.exe num.font.exp0.box

mftraining -F font_properties -U unicharset -O num.unicharset num.font.exp0.tr

echo Clustering..

cntraining.exe num.font.exp0.tr

echo Rename Files..

rename normproto num.normproto

rename inttemp num.inttemp

rename pffmtable num.pffmtable

rename shapetable num.shapetable

echo Create Tessdata..

combine_tessdata.exe num.

echo. & pause

tesseract num.font.exp0.tif num.font.exp0 batch.nochop makebox

4.实例脚本---

#############################################################################################
#  为了不输入验证码，特别写了这个小脚本                                                          #
#         作者:brian                                                                     #
#         时间:20190109                                                                      #
#############################################################################################

from selenium import webdriver
from time import sleep
from PIL import Image
import pytesseract
import datetime,time,random
now = datetime.datetime.now()
name = now.strftime("%Y%m%d_%H_%M_%S")
week = datetime.datetime.now().weekday()

#配置变量
url = "https://yq.aliyun.com/ziliao/4039425643653"
user = "YourLoginName"
password = "Qwe12345-*"


def binarizing(img, threashold):
    img = img.convert("L")  # 转灰度
    pixdata = img.load()
    w, h = img.size
    for y in range(h):
        for x in range(w):
            if pixdata[x, y] < threashold:
                pixdata[x, y] = 0
            else:
                pixdata[x, y] = 255
    return img
def removeFrame(img, width):
    '''
    :param img:
    :param  边框的宽度
    :return:
    '''
    w, h = img.size
    pixdata = img.load()
    for x in range(width):
        for y in range(0, h):
            pixdata[x, y] = 255
    for x in range(w - width, w):
        for y in range(0, h):
            pixdata[x, y] = 255
    for x in range(0, w):
        for y in range(0, width):
            pixdata[x, y] = 255
    for x in range(0, w):
        for y in range(h - width, h):
            pixdata[x, y] = 255




def write_log(data):
    """
    记录打卡成功的信息，并保存记录，最新记录在第一行。
    """
    with open(r"d:user7000000000桌面pic打卡成功的记录.txt", 'r+') as f:
        content = f.read()
        f.seek(0, 0)
        f.write(data + "
" + content)   


#避免被后台检测到老是同一时间打卡
delayTime = random.randint(1,10)*60
print("等待打卡时间是%s秒。" %delayTime)
for i in range(1,delayTime):
    print("等待第%s秒."%i)
    time.sleep(1)
print("开始打卡")



while True:
    """
    因为会识别验证码出错，所以得多次循环去识别，直到识别对为止
    """
    try:
        dr = webdriver.Chrome()
        dr.maximize_window()
        dr.get(url)
        dr.find_element_by_xpath("//*[@id='username']").send_keys(user)
        dr.find_element_by_xpath("//*[@id='password']").send_keys(password)
        sleep(2)
        #获取验证码
        dr.get_screenshot_as_file(r"d:user56765453345桌面picsource\%s.png" % name)
        location = dr.find_element_by_class_name('yzmImg').location
        size = dr.find_element_by_class_name('yzmImg').size
        left = location['x']
        top = location['y']
        right = location['x'] + size['width']
        bottom = location['y'] + size['height']
        a = Image.open(r"d:user56765453345桌面picsource\%s.png" % name)
        im = a.crop((left, top, right, bottom))
        im.save(r"d:user56765453345桌面picsource\%s.png" % name)
        pic1 = binarizing(im, 110)
        removeFrame(pic1, 3)
        pic1.save((r"d:user56765453345桌面pic
ew\%s.tif" % datetime.datetime.now().strftime("%Y%m%d_%H_%M_%S")))
        vcode = pytesseract.image_to_string(pic1,lang="num")
        new_vcode = vcode.replace(" ", "")
        varify_word = new_vcode
        if len(varify_word)!= 4 :
            print("验证码错误不等于4位",varify_word)
            dr.quit()
            continue
        else:
            print("验证码等于4位", varify_word)
        dr.find_element_by_xpath("//*[@id='verifyCode']").send_keys(varify_word)
        sleep(1)
        dr.find_element_by_xpath("//*[@id='loginForm']/div[5]/div/img").click()
        sleep(2)
        if dr.current_url == "https://yq.aliyun.com/ziliao/":
            print("登录百度系统失败")
            dr.quit()
            continue
        else:
            print("登录百度系统成功")
    except Exception as e:
        """登录过程的出错捕捉"""
        print("登录过程中出错了", e)
        dr.quit()
        continue
    sleep(2)
    if int(time.strftime("%H%M%S")) - 120000 <= 0:
        """判断打上班卡，还是下班卡（以中午12点基准判断）"""
        print("打上班卡去")
        try:
            if dr.find_element_by_xpath('//*[text()="上班签到"]').get_attribute("disabled") == "true":
                print("已经打过卡了，还打个毛线")
                dr.quit()
                break
            else:

                dr.find_element_by_xpath('//*[text()="上班签到"]').click()
                sleep(2)
                dr.refresh()
                sleep(2)
                if dr.find_element_by_xpath('//*[text()="上班签到"]').get_attribute("disabled") == "true":

                    msg_log = "今天工作日是%s,星期%s,打上班卡成功了，打卡时间是%s." % (time.strftime("%Y-%m-%d"), week + 1, time.strftime("%H:%M:%S"))
                    write_log(msg_log)
                    print(msg_log)
                    dr.quit()
                    break
                else:
                    print("去点击打卡了，但是打卡失败了")
                    dr.quit()
                    continue
        except Exception as e:
            print("打上班卡过程出错了",e)
            dr.quit()
            continue

    else:

        try:
            print("打下班卡去")
            sleep(2)
            startTime = dr.find_element_by_xpath("//div[@class='fc-today']/div/i").text
            startTime_srt = datetime.datetime.now().strftime("%Y:%m:%d")
            startTime_new = startTime_srt + " " + startTime
            print("早上打卡时间是：%s" % startTime_new)
            timeArray = time.strptime(startTime_new, "%Y:%m:%d %H:%M:%S")
            tartTime_Stamp = int(time.mktime(timeArray))
            end_time = int(tartTime_Stamp + 60*9.5*60)  #目标下班时间戳
            struct_time = time.localtime(end_time)
            Format_time = time.strftime("%Y:%m:%d %H:%M:%S",struct_time)
            print("目标下班时间是: %s  "% Format_time)
            now_end_time = int(time.time()) #现在时间戳
            if end_time < now_end_time: #下班时间小于现在时间，才可以去打卡
                dr.find_element_by_xpath('//*[text()="下班签退"]').click()
                sleep(2)
                dr.find_element_by_xpath('//*[@id="bsWinPopupBoxModal"]/div/div/div[3]/button[1]').click()
                sleep(2)
                OffWorkTime = dr.find_element_by_xpath("//div[@class='fc-today']/div/i[2]").text
                dr.quit()
                #通过网页获取打下班卡的时间
                msg_log = "今天工作日是%s,星期%s,打下班卡成功了，打卡时间是%s." % (time.strftime("%Y-%m-%d"), week + 1, OffWorkTime)
                write_log(msg_log)
                print(msg_log)
                break
            else:
                print("现在时间是: %s,目标下班时间是: %s，还没到打卡时间哦，等等吧 ！" %(datetime.datetime.now().strftime("%Y:%m:%d %H:%M:%S"),Format_time))
                dr.quit()
                break
        except Exception as e:
            print("打下班卡过程出错了",e)
            dr.quit()
            continue

来自：https://www.cnblogs.com/MrRead/p/7656800.html

https://www.cnblogs.com/MrRead/p/7656800.html

查看全文

相关阅读:
在oschina上新建项目的步骤
 将txt转为DataTable的方法
 设置IIS让网站拥有“网站目录外文件”的读写权限的操作（图文）
从客户端****中检测到有潜在危险的 Request.QueryString 值在.net mvc下的解决方法
 动态调用类里的方法的示例(wjx)
Pyhton忽略返回变量方法
 wsl安装Ubuntu16.04+Python2.7
win10快速调用Shell代替GitBash
wsl与win10文件互访
 OpenCV报错file too short解决

原文地址：https://www.cnblogs.com/brianlai/p/10302891.html