zoukankan html css js c++ java

ocr识别

步骤分两步：

第一步先将图片摆正，输出扫描版（应用透视变换）

第二步用pytesseract识别即可

main.py

import torch
import numpy as np
import cv2
import math
import test


def show(img):
    cv2.imshow('name', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


def pers_transform(img, coor):
    coor = sorted(coor, key = lambda a : (a[0], -a[1])) #根据第0个元素升序，若相同则根据第二个降序
    lx1, lx2, rx1, rx2 = coor
    scoor = np.array([lx1, lx2, rx1, rx2], dtype = np.float32)
    w1 = math.sqrt(math.pow((lx1[0] - rx1[0]), 2) + math.pow((lx1[1] - rx1[1]), 2))
    w2 = math.sqrt(math.pow((lx2[0] - rx2[0]), 2) + math.pow((lx2[1] - rx2[1]), 2))
    w = int(max(w1, w2))  #w和h均取大的那一个，因为近似轮廓可能只是个四边形，不是矩形
    h1 = math.sqrt(math.pow((lx1[0] - lx2[0]), 2) + math.pow((lx1[1] - lx2[1]), 2))
    h2 = math.sqrt(math.pow((rx1[0] - rx2[0]), 2) + math.pow((rx1[1] - rx2[1]), 2))
    h = int(max(h1, h2))
    dcoor = np.array([[0, 0], [0, h], [w, 0], [w, h]], dtype = np.float32) #摆正后的坐标，顺序与原坐标相对应
    trans_m = cv2.getPerspectiveTransform(scoor, dcoor) #获取透视变换矩阵
    return cv2.warpPerspective(img, trans_m, (w, h))    #传入图像、矩阵、宽和长，返回变换完成的图像




if __name__ == '__main__':
    dst = 'C:/Users/Dell/PycharmProjects/ExtractText/scan.jpg'
    img = cv2.imread('C:/Users/Dell/PycharmProjects/ExtractText/images/receipt.jpg')
    ratio = img.shape[0] / 600
    img_k = cv2.resize(img, (int(img.shape[1] / (img.shape[0] / 1000)), 1000))
    img_gray = cv2.cvtColor(img_k, cv2.COLOR_BGR2GRAY)
    img_canny = cv2.Canny(img_gray, 200, 255)
#img_bin = cv2.threshold(img_canny, 200, 255, cv2.THRESH_BINARY)[1]
    contours = cv2.findContours(img_canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[1]
    contours = sorted(contours, key = cv2.contourArea, reverse = True)
    img_k_copy = img_k.copy()
    peri = cv2.arcLength(contours[0], True)
    approx = cv2.approxPolyDP(contours[0], 0.02 * peri, True)
    cv2.drawContours(img_k_copy, [approx], -1, (0, 0, 255), 2)
    img_pers = pers_transform(img_gray, approx.reshape(4, 2)) #轮廓是三维的，所以reshape一下，可以变成2维
    img_pers_bin = cv2.threshold(img_pers, 150, 255, cv2.THRESH_BINARY)[1]
    cv2.imwrite('scan.jpg', img_pers_bin)
    print('Accepted')
    test.scan(dst)

test.py

from PIL import Image
import cv2
import numpy as np
import pytesseract

def show(img):
    cv2.imshow('name', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

def scan(dst):
    img = cv2.imread(dst)
    # img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # img_bin = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

    img_k = Image.open(dst)
    text = pytesseract.image_to_string(img_k)
    print(text)
    show(img)

自己选择的路，跪着也要走完。朋友们，虽然这个世界日益浮躁起来，只要能够为了当时纯粹的梦想和感动坚持努力下去，不管其它人怎么样，我们也能够保持自己的本色走下去。

查看全文

相关阅读:
关于云原生应用的思考
 动手实现 LRU 算法，以及 Caffeine 和 Redis 中的缓存淘汰策略
 Spring5-Reactor函数式编程
 架构简洁之道：从阿里开源应用架构 COLA 说起
 如何优雅地运用位运算实现产品需求？
如何优雅地运用位运算实现产品需求？
图形处理：给 Canvas 文本填充线性渐变
 深入理解EnableAutoConfiguration原理
 pwnable.tw之3x17
WebRTC之完整搭建Jitsi Meet指南

原文地址：https://www.cnblogs.com/WTSRUVF/p/15292218.html

热门文章
拓扑排序
 P2170 选学霸
 P2016 战略游戏
 无根树
 高斯消元
 分层图
 分块九讲
 SPFA浅谈
 memset无穷赋值
 无穷大