zoukankan html css js c++ java

pytesseract 验证码识别

以下代码，如有不懂加群讨论
# *-* coding:utf-8 *-*  #
import json
import requests
import pytesseract
import time
import datetime
from PIL import Image
from bs4 import BeautifulSoup
import urllib3
import random
import os

def binarizing(img, threshold):
    # input: gray image, get black and white images
    pixdata = img.load()
    w, h = img.size
    for y in range(h):
        for x in range(w):
            if pixdata[x, y] < threshold:
                pixdata[x, y] = 0
            else:
                pixdata[x, y] = 255
    return img


def depoint(img):
    # input: gray image, remove the noise
    pixdata = img.load()
    w, h = img.size
    for x in range(1, w - 1):
        if x > 1 and x != w - 2:
            # 获取目标像素点左右位置
            left = x - 1
            right = x + 1

        for y in range(1, h - 1):
            # 获取目标像素点上下位置
            up = y - 1
            down = y + 1

            if x <= 2 or x >= (w - 2):
                img.putpixel((x, y), 255)

            elif y <= 2 or y >= (h - 2):
                img.putpixel((x, y), 255)

            elif img.getpixel((x, y)) == 0:
                if y > 1 and y != h - 1:

                    # 以目标像素点为中心点，获取周围像素点颜色
                    # 0为黑色，255为白色
                    up_color = img.getpixel((x, up))
                    down_color = img.getpixel((x, down))
                    left_color = img.getpixel((left, y))
                    left_down_color = img.getpixel((left, down))
                    right_color = img.getpixel((right, y))
                    right_up_color = img.getpixel((right, up))
                    right_down_color = img.getpixel((right, down))
                    # 去除竖线干扰线
                    if down_color == 0:
                        if left_color == 255 and left_down_color == 255 and 
                                right_color == 255 and right_down_color == 255:
                            img.putpixel((x, y), 255)

                    # 去除横线干扰线

                    elif right_color == 0:
                        if down_color == 255 and right_down_color == 255 and 
                                up_color == 255 and right_up_color == 255:
                            img.putpixel((x, y), 255)

                # 去除斜线干扰线
                if left_color == 255 and right_color == 255 
                        and up_color == 255 and down_color == 255:
                    img.putpixel((x, y), 255)

    return img



def get_code():
    '''
    下载验证码并pytesseract 识别验证码
    :return:
    '''
    code_file = '1.jpg'
    image = Image.open(code_file)
    image.show()
    #text = input('请输入验证码：')
    image = image.convert("L")
    binarizing(image, 110)
    depoint(image)
    image.show()

    text = pytesseract.image_to_string(image)
    return  text

def get_xsrf():
    code = get_code()
    print (code)


if __name__ == '__main__':
    get_xsrf()

查看全文

相关阅读:
洛谷P3128 [USACO15DEC]Max Flow P 题解树上差分（点差分）
数列分块解决区间更新+区间最值问题
 ThinkPad P1 Gen3 4K 显示器出现间歇闪黑屏情况解决
 Qt自定义弹出式菜单(Qt自定义弹窗)
软件产品易用性评价评估标准
 vue用echarts实现中国地图和世界地图
 知了业务逻辑梳理
 string.gfind string.gmatch
无法定位程序输入点在 XXXX上...
[Lua]c解析lua 嵌套table

原文地址：https://www.cnblogs.com/chaihy/p/10215697.html