zoukankan      html  css  js  c++  java
  • pytesseract 验证码识别

    以下代码,如有不懂加群讨论
    # *-* coding:utf-8 *-* #
    import json
    import requests
    import pytesseract
    import time
    import datetime
    from PIL import Image
    from bs4 import BeautifulSoup
    import urllib3
    import random
    import os

    def binarizing(img, threshold):
    # input: gray image, get black and white images
    pixdata = img.load()
    w, h = img.size
    for y in range(h):
    for x in range(w):
    if pixdata[x, y] < threshold:
    pixdata[x, y] = 0
    else:
    pixdata[x, y] = 255
    return img


    def depoint(img):
    # input: gray image, remove the noise
    pixdata = img.load()
    w, h = img.size
    for x in range(1, w - 1):
    if x > 1 and x != w - 2:
    # 获取目标像素点左右位置
    left = x - 1
    right = x + 1

    for y in range(1, h - 1):
    # 获取目标像素点上下位置
    up = y - 1
    down = y + 1

    if x <= 2 or x >= (w - 2):
    img.putpixel((x, y), 255)

    elif y <= 2 or y >= (h - 2):
    img.putpixel((x, y), 255)

    elif img.getpixel((x, y)) == 0:
    if y > 1 and y != h - 1:

    # 以目标像素点为中心点,获取周围像素点颜色
    # 0为黑色,255为白色
    up_color = img.getpixel((x, up))
    down_color = img.getpixel((x, down))
    left_color = img.getpixel((left, y))
    left_down_color = img.getpixel((left, down))
    right_color = img.getpixel((right, y))
    right_up_color = img.getpixel((right, up))
    right_down_color = img.getpixel((right, down))
    # 去除竖线干扰线
    if down_color == 0:
    if left_color == 255 and left_down_color == 255 and
    right_color == 255 and right_down_color == 255:
    img.putpixel((x, y), 255)

    # 去除横线干扰线

    elif right_color == 0:
    if down_color == 255 and right_down_color == 255 and
    up_color == 255 and right_up_color == 255:
    img.putpixel((x, y), 255)

    # 去除斜线干扰线
    if left_color == 255 and right_color == 255
    and up_color == 255 and down_color == 255:
    img.putpixel((x, y), 255)

    return img



    def get_code():
    '''
    下载验证码并pytesseract 识别验证码
    :return:
    '''
    code_file = '1.jpg'
    image = Image.open(code_file)
    image.show()
    #text = input('请输入验证码:')
    image = image.convert("L")
    binarizing(image, 110)
    depoint(image)
    image.show()

    text = pytesseract.image_to_string(image)
    return text

    def get_xsrf():
    code = get_code()
    print (code)


    if __name__ == '__main__':
    get_xsrf()

    
    
  • 相关阅读:
    洛谷P3128 [USACO15DEC]Max Flow P 题解 树上差分(点差分)
    数列分块解决区间更新+区间最值问题
    ThinkPad P1 Gen3 4K 显示器出现间歇闪黑屏情况解决
    Qt自定义弹出式菜单(Qt自定义弹窗)
    软件产品易用性评价评估标准
    vue用echarts实现中国地图和世界地图
    知了业务逻辑梳理
    string.gfind string.gmatch
    无法定位程序输入点在 XXXX上...
    [Lua]c解析lua 嵌套table
  • 原文地址:https://www.cnblogs.com/chaihy/p/10215697.html
Copyright © 2011-2022 走看看