zoukankan      html  css  js  c++  java
  • OpenCV--文档扫描OCR识别

    scan.py:

    # 导入工具包
    import numpy as np
    import argparse
    import cv2
    
    # 设置参数
    ap = argparse.ArgumentParser()
    ap.add_argument("-i", "--image", required = True,
        help = "Path to the image to be scanned") 
    args = vars(ap.parse_args())
    
    def order_points(pts):
        # 一共4个坐标点
        rect = np.zeros((4, 2), dtype = "float32")
    
        # 按顺序找到对应坐标0123分别是 左上,右上,右下,左下
        # 计算左上,右下
        s = pts.sum(axis = 1)
        rect[0] = pts[np.argmin(s)]
        rect[2] = pts[np.argmax(s)]
    
        # 计算右上和左下
        diff = np.diff(pts, axis = 1)
        rect[1] = pts[np.argmin(diff)]
        rect[3] = pts[np.argmax(diff)]
    
        return rect
    
    def four_point_transform(image, pts):
        # 获取输入坐标点
        rect = order_points(pts)
        (tl, tr, br, bl) = rect
    
        # 计算输入的w和h值
        widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
        widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
        maxWidth = max(int(widthA), int(widthB))
    
        heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
        heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
        maxHeight = max(int(heightA), int(heightB))
    
        # 变换后对应坐标位置
        dst = np.array([
            [0, 0],
            [maxWidth - 1, 0],
            [maxWidth - 1, maxHeight - 1],
            [0, maxHeight - 1]], dtype = "float32")
    
        # 计算变换矩阵
        M = cv2.getPerspectiveTransform(rect, dst)
        warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    
        # 返回变换后结果
        return warped
    
    def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
        dim = None
        (h, w) = image.shape[:2]
        if width is None and height is None:
            return image
        if width is None:
            r = height / float(h)
            dim = (int(w * r), height)
        else:
            r = width / float(w)
            dim = (width, int(h * r))
        resized = cv2.resize(image, dim, interpolation=inter)
        return resized
    
    # 读取输入
    image = cv2.imread(args["image"])
    #坐标也会相同变化
    ratio = image.shape[0] / 500.0
    orig = image.copy()
    
    
    image = resize(orig, height = 500)
    
    # 预处理
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(gray, 75, 200)
    
    # 展示预处理结果
    print("STEP 1: 边缘检测")
    cv2.imshow("Image", image)
    cv2.imshow("Edged", edged)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    # 轮廓检测
    cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[1]
    cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]
    
    # 遍历轮廓
    for c in cnts:
        # 计算轮廓近似
        peri = cv2.arcLength(c, True)
        # C表示输入的点集
        # epsilon表示从原始轮廓到近似轮廓的最大距离,它是一个准确度参数
        # True表示封闭的
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)
    
        # 4个点的时候就拿出来
        if len(approx) == 4:
            screenCnt = approx
            break
    
    # 展示结果
    print("STEP 2: 获取轮廓")
    cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
    cv2.imshow("Outline", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    # 透视变换
    warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
    
    # 二值处理
    warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
    ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]
    cv2.imwrite('scan.jpg', ref)
    # 展示结果
    print("STEP 3: 变换")
    cv2.imshow("Original", resize(orig, height = 650))
    cv2.imshow("Scanned", resize(ref, height = 650))
    cv2.waitKey(0)

    效果:

    利用tesseract工具识别出字符:

    # https://digi.bib.uni-mannheim.de/tesseract/
    # 配置环境变量如E:Program Files (x86)Tesseract-OCR
    # tesseract -v进行测试
    # tesseract XXX.png 得到结果 
    # pip install pytesseract
    # anaconda lib site-packges pytesseract pytesseract.py
    # tesseract_cmd 修改为绝对路径即可
    from PIL import Image
    import pytesseract
    import cv2
    import os
    
    preprocess = 'blur' #thresh
    
    image = cv2.imread('scan.jpg')
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    if preprocess == "thresh":
        gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    
    if preprocess == "blur":
        gray = cv2.medianBlur(gray, 3)
        
    filename = "{}.png".format(os.getpid())
    cv2.imwrite(filename, gray)
        
    text = pytesseract.image_to_string(Image.open(filename))
    print(text)
    os.remove(filename)
    
    cv2.imshow("Image", image)
    cv2.imshow("Output", gray)
    cv2.waitKey(0)                                   

    效果:

    we owe oak wk ome owe ow wo Sk we %o %o %K
    
     
    
    WHOLE FOODS MARKET - WESTPORT,.CT 06880
    399 POST RD WEST - (203) 227-6858
    
    64
    365
    365
    
    365
    
    BACULN LS
    BACON LS
    BACON LS
    BACON iS
    BRO TH CHIC
    
    FLOUR ALMUNU
    CHKN BRST BNLSS SK
    HEAVY CREAM
    
    BALSMC REDUCT
    
    BEEF
    
    GRND
    JUICE COF CRSHEW
    
    85/15
    
    L.
    
    DOCS PINT QORGAK IC
    HNY ALMOND Bui TR
    
    * x ## TAX
    
    . 00
    
    BAL
    
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    
    4 99
    4.99
    4.99
    1 39
    2.19
    1.99
    . 80
    . 39
    . 49
    
    tl &
    
    on
    
    8.99
    
    14.49
    
    9.99
    101.33
    
    m
    
    "Ti
    
    m n m
  • 相关阅读:
    Redis的安装和部署
    SaltStack应用grains和jinja模板-第四篇
    SaltStack部署配置Tomcat-第三篇
    python魔法方法、构造函数、序列与映射、迭代器、生成器
    python异常
    python类
    python之函数、参数、作用域、递归
    docker+openvswitch实现主机与容器的网络通信
    Docker网络和容器的通信
    docker命名空间、控制组及联合文件系统概念
  • 原文地址:https://www.cnblogs.com/SCCQ/p/12296670.html
Copyright © 2011-2022 走看看