zoukankan html css js c++ java

python 识别身份证号码

# !/usr/bin/python
# -*-coding:utf-8-*-
import sys

import time

time1 = time.time()
from PIL import Image
import pytesseract


###########二值化算法
def binarizing(img, threshold):
    pixdata = img.load()
    w, h = img.size
    for y in range(h):
        for x in range(w):
            if pixdata[x, y] < threshold:
                pixdata[x, y] = 0
            else:
                pixdata[x, y] = 255
    return img


###########去除干扰线算法
def depoint(img):  # input: gray image
    pixdata = img.load()
    w, h = img.size
    for y in range(1, h - 1):
        for x in range(1, w - 1):
            count = 0
            if pixdata[x, y - 1] > 245:
                count = count + 1
            if pixdata[x, y + 1] > 245:
                count = count + 1
            if pixdata[x - 1, y] > 245:
                count = count + 1
            if pixdata[x + 1, y] > 245:
                count = count + 1
            if count > 2:
                pixdata[x, y] = 255
    return img


########身份证号码识别
def identity_OCR(pic_path):
    #####身份证号码截图
    img1 = Image.open(pic_path)
    w, h = img1.size
    ##将身份证放大3倍
    out = img1.resize((w * 3, h * 3), Image.ANTIALIAS)
    region = (125 * 3, 200 * 3, 370 * 3, 250 * 3)
    # 裁切身份证号码图片
    cropImg = out.crop(region)
    # 转化为灰度图
    img = cropImg.convert('L')
    # 把图片变成二值图像。
    img1 = binarizing(img, 100)
    img2 = depoint(img)
    code = pytesseract.image_to_string(img2)
    print("识别该身份证号码是:" + str(code))


if __name__ == '__main__':
    pic_path = "./1.png"
    identity_OCR(pic_path)
    time2 = time.time()
    print(u'总共耗时：' + str(time2 - time1) + 's')

查看全文

相关阅读:
什么是脚本语言？什么是解释性语言？什么是编译性语言？
自己写的java excel导出工具类
 httpclient模拟post请求json封装表单数据
 git 远程库命令
 git 常用命令及解析由浅入深
 图书馆借书（年，月，日）
线索二叉树
 AxB Proplem（大数乘法）
简易版最长序列（map映射）
小黑跳地毯减肥（递推）

原文地址：https://www.cnblogs.com/qiandu/p/10280670.html