zoukankan      html  css  js  c++  java
  • pytesseract 验证码识别

    以下代码,如有不懂加群讨论
    # *-* coding:utf-8 *-* #
    import json
    import requests
    import pytesseract
    import time
    import datetime
    from PIL import Image
    from bs4 import BeautifulSoup
    import urllib3
    import random
    import os

    def binarizing(img, threshold):
    # input: gray image, get black and white images
    pixdata = img.load()
    w, h = img.size
    for y in range(h):
    for x in range(w):
    if pixdata[x, y] < threshold:
    pixdata[x, y] = 0
    else:
    pixdata[x, y] = 255
    return img


    def depoint(img):
    # input: gray image, remove the noise
    pixdata = img.load()
    w, h = img.size
    for x in range(1, w - 1):
    if x > 1 and x != w - 2:
    # 获取目标像素点左右位置
    left = x - 1
    right = x + 1

    for y in range(1, h - 1):
    # 获取目标像素点上下位置
    up = y - 1
    down = y + 1

    if x <= 2 or x >= (w - 2):
    img.putpixel((x, y), 255)

    elif y <= 2 or y >= (h - 2):
    img.putpixel((x, y), 255)

    elif img.getpixel((x, y)) == 0:
    if y > 1 and y != h - 1:

    # 以目标像素点为中心点,获取周围像素点颜色
    # 0为黑色,255为白色
    up_color = img.getpixel((x, up))
    down_color = img.getpixel((x, down))
    left_color = img.getpixel((left, y))
    left_down_color = img.getpixel((left, down))
    right_color = img.getpixel((right, y))
    right_up_color = img.getpixel((right, up))
    right_down_color = img.getpixel((right, down))
    # 去除竖线干扰线
    if down_color == 0:
    if left_color == 255 and left_down_color == 255 and
    right_color == 255 and right_down_color == 255:
    img.putpixel((x, y), 255)

    # 去除横线干扰线

    elif right_color == 0:
    if down_color == 255 and right_down_color == 255 and
    up_color == 255 and right_up_color == 255:
    img.putpixel((x, y), 255)

    # 去除斜线干扰线
    if left_color == 255 and right_color == 255
    and up_color == 255 and down_color == 255:
    img.putpixel((x, y), 255)

    return img



    def get_code():
    '''
    下载验证码并pytesseract 识别验证码
    :return:
    '''
    code_file = '1.jpg'
    image = Image.open(code_file)
    image.show()
    #text = input('请输入验证码:')
    image = image.convert("L")
    binarizing(image, 110)
    depoint(image)
    image.show()

    text = pytesseract.image_to_string(image)
    return text

    def get_xsrf():
    code = get_code()
    print (code)


    if __name__ == '__main__':
    get_xsrf()

    
    
  • 相关阅读:
    python连接redis sentinel集群(哨兵模式)
    xpath的高级使用:用xpath定位当前元素的相邻元素/兄弟元素
    获取pycharm通行证的链接
    如何实现多个爬虫循环顺序爬取
    linux查看最末几行
    django项目在linux(centos7)上配置好了,在window上想通过ip:8000访问却始终访问不了
    python 浅谈os.path路径问题
    java程序设计第二次实验报告
    实验一 Java开发环境的熟悉
    MATLAB 图片折腾4
  • 原文地址:https://www.cnblogs.com/chaihy/p/10215697.html
Copyright © 2011-2022 走看看