zoukankan      html  css  js  c++  java
  • OpenCV--文档扫描OCR识别

    scan.py:

    # 导入工具包
    import numpy as np
    import argparse
    import cv2
    
    # 设置参数
    ap = argparse.ArgumentParser()
    ap.add_argument("-i", "--image", required = True,
        help = "Path to the image to be scanned") 
    args = vars(ap.parse_args())
    
    def order_points(pts):
        # 一共4个坐标点
        rect = np.zeros((4, 2), dtype = "float32")
    
        # 按顺序找到对应坐标0123分别是 左上,右上,右下,左下
        # 计算左上,右下
        s = pts.sum(axis = 1)
        rect[0] = pts[np.argmin(s)]
        rect[2] = pts[np.argmax(s)]
    
        # 计算右上和左下
        diff = np.diff(pts, axis = 1)
        rect[1] = pts[np.argmin(diff)]
        rect[3] = pts[np.argmax(diff)]
    
        return rect
    
    def four_point_transform(image, pts):
        # 获取输入坐标点
        rect = order_points(pts)
        (tl, tr, br, bl) = rect
    
        # 计算输入的w和h值
        widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
        widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
        maxWidth = max(int(widthA), int(widthB))
    
        heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
        heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
        maxHeight = max(int(heightA), int(heightB))
    
        # 变换后对应坐标位置
        dst = np.array([
            [0, 0],
            [maxWidth - 1, 0],
            [maxWidth - 1, maxHeight - 1],
            [0, maxHeight - 1]], dtype = "float32")
    
        # 计算变换矩阵
        M = cv2.getPerspectiveTransform(rect, dst)
        warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    
        # 返回变换后结果
        return warped
    
    def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
        dim = None
        (h, w) = image.shape[:2]
        if width is None and height is None:
            return image
        if width is None:
            r = height / float(h)
            dim = (int(w * r), height)
        else:
            r = width / float(w)
            dim = (width, int(h * r))
        resized = cv2.resize(image, dim, interpolation=inter)
        return resized
    
    # 读取输入
    image = cv2.imread(args["image"])
    #坐标也会相同变化
    ratio = image.shape[0] / 500.0
    orig = image.copy()
    
    
    image = resize(orig, height = 500)
    
    # 预处理
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(gray, 75, 200)
    
    # 展示预处理结果
    print("STEP 1: 边缘检测")
    cv2.imshow("Image", image)
    cv2.imshow("Edged", edged)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    # 轮廓检测
    cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[1]
    cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]
    
    # 遍历轮廓
    for c in cnts:
        # 计算轮廓近似
        peri = cv2.arcLength(c, True)
        # C表示输入的点集
        # epsilon表示从原始轮廓到近似轮廓的最大距离,它是一个准确度参数
        # True表示封闭的
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)
    
        # 4个点的时候就拿出来
        if len(approx) == 4:
            screenCnt = approx
            break
    
    # 展示结果
    print("STEP 2: 获取轮廓")
    cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
    cv2.imshow("Outline", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    # 透视变换
    warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
    
    # 二值处理
    warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
    ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]
    cv2.imwrite('scan.jpg', ref)
    # 展示结果
    print("STEP 3: 变换")
    cv2.imshow("Original", resize(orig, height = 650))
    cv2.imshow("Scanned", resize(ref, height = 650))
    cv2.waitKey(0)

    效果:

    利用tesseract工具识别出字符:

    # https://digi.bib.uni-mannheim.de/tesseract/
    # 配置环境变量如E:Program Files (x86)Tesseract-OCR
    # tesseract -v进行测试
    # tesseract XXX.png 得到结果 
    # pip install pytesseract
    # anaconda lib site-packges pytesseract pytesseract.py
    # tesseract_cmd 修改为绝对路径即可
    from PIL import Image
    import pytesseract
    import cv2
    import os
    
    preprocess = 'blur' #thresh
    
    image = cv2.imread('scan.jpg')
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    if preprocess == "thresh":
        gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    
    if preprocess == "blur":
        gray = cv2.medianBlur(gray, 3)
        
    filename = "{}.png".format(os.getpid())
    cv2.imwrite(filename, gray)
        
    text = pytesseract.image_to_string(Image.open(filename))
    print(text)
    os.remove(filename)
    
    cv2.imshow("Image", image)
    cv2.imshow("Output", gray)
    cv2.waitKey(0)                                   

    效果:

    we owe oak wk ome owe ow wo Sk we %o %o %K
    
     
    
    WHOLE FOODS MARKET - WESTPORT,.CT 06880
    399 POST RD WEST - (203) 227-6858
    
    64
    365
    365
    
    365
    
    BACULN LS
    BACON LS
    BACON LS
    BACON iS
    BRO TH CHIC
    
    FLOUR ALMUNU
    CHKN BRST BNLSS SK
    HEAVY CREAM
    
    BALSMC REDUCT
    
    BEEF
    
    GRND
    JUICE COF CRSHEW
    
    85/15
    
    L.
    
    DOCS PINT QORGAK IC
    HNY ALMOND Bui TR
    
    * x ## TAX
    
    . 00
    
    BAL
    
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    NP
    
    4 99
    4.99
    4.99
    1 39
    2.19
    1.99
    . 80
    . 39
    . 49
    
    tl &
    
    on
    
    8.99
    
    14.49
    
    9.99
    101.33
    
    m
    
    "Ti
    
    m n m
  • 相关阅读:
    android基础开发之一setContentView用法
    setContentView( )方法
    如何使用andriod的布局标签
    也谈layout_gravity和gravity的用法
    SlidingMenu第二篇 --- SlidingMenu常用属性介绍
    SlidingMenu第一篇 --- 导入SlidingMenu库
    JS----对象的合并与克隆
    angular2 ----字符串、对象、base64 之间的转换
    WebStorm过期解决方法
    JavaScript--鼠标滚动改变图片大小
  • 原文地址:https://www.cnblogs.com/SCCQ/p/12296670.html
Copyright © 2011-2022 走看看