zoukankan      html  css  js  c++  java
  • OpenCV--文档扫描OCR识别

    scan.py:

    # 导入工具包
    import numpy as np
    import argparse
    import cv2
    
    # 设置参数
    ap = argparse.ArgumentParser()
    ap.add_argument("-i", "--image", required = True,
        help = "Path to the image to be scanned") 
    args = vars(ap.parse_args())
    
    def order_points(pts):
        # 一共4个坐标点
        rect = np.zeros((4, 2), dtype = "float32")
    
        # 按顺序找到对应坐标0123分别是 左上,右上,右下,左下
        # 计算左上,右下
        s = pts.sum(axis = 1)
        rect[0] = pts[np.argmin(s)]
        rect[2] = pts[np.argmax(s)]
    
        # 计算右上和左下
        diff = np.diff(pts, axis = 1)
        rect[1] = pts[np.argmin(diff)]
        rect[3] = pts[np.argmax(diff)]
    
        return rect
    
    def four_point_transform(image, pts):
        # 获取输入坐标点
        rect = order_points(pts)
        (tl, tr, br, bl) = rect
    
        # 计算输入的w和h值
        widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
        widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
        maxWidth = max(int(widthA), int(widthB))
    
        heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
        heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
        maxHeight = max(int(heightA), int(heightB))
    
        # 变换后对应坐标位置
        dst = np.array([
            [0, 0],
            [maxWidth - 1, 0],
            [maxWidth - 1, maxHeight - 1],
            [0, maxHeight - 1]], dtype = "float32")
    
        # 计算变换矩阵
        M = cv2.getPerspectiveTransform(rect, dst)
        warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    
        # 返回变换后结果
        return warped
    
    def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
        dim = None
        (h, w) = image.shape[:2]
        if width is None and height is None:
            return image
        if width is None:
            r = height / float(h)
            dim = (int(w * r), height)
        else:
            r = width / float(w)
            dim = (width, int(h * r))
        resized = cv2.resize(image, dim, interpolation=inter)
        return resized
    
    # 读取输入
    image = cv2.imread(args["image"])
    #坐标也会相同变化
    ratio = image.shape[0] / 500.0
    orig = image.copy()
    
    
    image = resize(orig, height = 500)
    
    # 预处理
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(gray, 75, 200)
    
    # 展示预处理结果
    print("STEP 1: 边缘检测")
    cv2.imshow("Image", image)
    cv2.imshow("Edged", edged)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    # 轮廓检测
    cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[1]
    cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]
    
    # 遍历轮廓
    for c in cnts:
        # 计算轮廓近似
        peri = cv2.arcLength(c, True)
        # C表示输入的点集
        # epsilon表示从原始轮廓到近似轮廓的最大距离,它是一个准确度参数
        # True表示封闭的
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)
    
        # 4个点的时候就拿出来
        if len(approx) == 4:
            screenCnt = approx
            break
    
    # 展示结果
    print("STEP 2: 获取轮廓")
    cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
    cv2.imshow("Outline", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    # 透视变换
    warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
    
    # 二值处理
    warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
    ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]
    cv2.imwrite('scan.jpg', ref)
    # 展示结果
    print("STEP 3: 变换")
    cv2.imshow("Original", resize(orig, height = 650))
    cv2.imshow("Scanned", resize(ref, height = 650))
    cv2.waitKey(0)

    效果:

    利用tesseract工具识别出字符:

    # https://digi.bib.uni-mannheim.de/tesseract/
    # 配置环境变量如E:Program Files (x86)Tesseract-OCR
    # tesseract -v进行测试
    # tesseract XXX.png 得到结果 
    # pip install pytesseract
    # anaconda lib site-packges pytesseract pytesseract.py
    # tesseract_cmd 修改为绝对路径即可
    from PIL import Image
    import pytesseract
    import cv2
    import os
    
    preprocess = 'blur' #thresh
    
    image = cv2.imread('scan.jpg')
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    if preprocess == "thresh":
        gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    
    if preprocess == "blur":
        gray = cv2.medianBlur(gray, 3)
        
    filename = "{}.png".format(os.getpid())
    cv2.imwrite(filename, gray)
        
    text = pytesseract.image_to_string(Image.open(filename))
    print(text)
    os.remove(filename)
    
    cv2.imshow("Image", image)
    cv2.imshow("Output", gray)
    cv2.waitKey(0)                                   

    效果:

    we owe oak wk ome owe ow wo Sk we %o %o %K
    
     
    
    WHOLE FOODS MARKET - WESTPORT,.CT 06880
    399 POST RD WEST - (203) 227-6858
    
    64
    365
    365
    
    365
    
    BACULN LS
    BACON LS
    BACON LS
    BACON iS
    BRO TH CHIC
    
    FLOUR ALMUNU
    CHKN BRST BNLSS SK
    HEAVY CREAM
    
    BALSMC REDUCT
    
    BEEF
    
    GRND
    JUICE COF CRSHEW
    
    85/15
    
    L.
    
    DOCS PINT QORGAK IC
    HNY ALMOND Bui TR
    
    * x ## TAX
    
    . 00
    
    BAL
    
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    
    4 99
    4.99
    4.99
    1 39
    2.19
    1.99
    . 80
    . 39
    . 49
    
    tl &
    
    on
    
    8.99
    
    14.49
    
    9.99
    101.33
    
    m
    
    "Ti
    
    m n m
  • 相关阅读:
    协同过滤
    深度学习中 epoch,[batch size], iterations概念解释
    如何查看Python内置模块的实现代码
    机器学习/数据挖掘/算法岗位
    算法工程师B
    算法工程师A
    web性能测试基本性能指标
    Loadrunner11不能调用IE8解决方法大全
    抓取Android应用的log
    关于字符latin capital letter sharp s "ß"( U+1E9E)显示的问题
  • 原文地址:https://www.cnblogs.com/SCCQ/p/12296670.html
Copyright © 2011-2022 走看看