步骤分两步:
第一步先将图片摆正,输出扫描版(应用透视变换)
第二步用pytesseract识别即可
main.py
import torch import numpy as np import cv2 import math import test def show(img): cv2.imshow('name', img) cv2.waitKey(0) cv2.destroyAllWindows() def pers_transform(img, coor): coor = sorted(coor, key = lambda a : (a[0], -a[1])) #根据第0个元素升序,若相同则根据第二个降序 lx1, lx2, rx1, rx2 = coor scoor = np.array([lx1, lx2, rx1, rx2], dtype = np.float32) w1 = math.sqrt(math.pow((lx1[0] - rx1[0]), 2) + math.pow((lx1[1] - rx1[1]), 2)) w2 = math.sqrt(math.pow((lx2[0] - rx2[0]), 2) + math.pow((lx2[1] - rx2[1]), 2)) w = int(max(w1, w2)) #w和h均取大的那一个,因为近似轮廓可能只是个四边形,不是矩形 h1 = math.sqrt(math.pow((lx1[0] - lx2[0]), 2) + math.pow((lx1[1] - lx2[1]), 2)) h2 = math.sqrt(math.pow((rx1[0] - rx2[0]), 2) + math.pow((rx1[1] - rx2[1]), 2)) h = int(max(h1, h2)) dcoor = np.array([[0, 0], [0, h], [w, 0], [w, h]], dtype = np.float32) #摆正后的坐标,顺序与原坐标相对应 trans_m = cv2.getPerspectiveTransform(scoor, dcoor) #获取透视变换矩阵 return cv2.warpPerspective(img, trans_m, (w, h)) #传入图像、矩阵、宽和长,返回变换完成的图像 if __name__ == '__main__': dst = 'C:/Users/Dell/PycharmProjects/ExtractText/scan.jpg' img = cv2.imread('C:/Users/Dell/PycharmProjects/ExtractText/images/receipt.jpg') ratio = img.shape[0] / 600 img_k = cv2.resize(img, (int(img.shape[1] / (img.shape[0] / 1000)), 1000)) img_gray = cv2.cvtColor(img_k, cv2.COLOR_BGR2GRAY) img_canny = cv2.Canny(img_gray, 200, 255) #img_bin = cv2.threshold(img_canny, 200, 255, cv2.THRESH_BINARY)[1] contours = cv2.findContours(img_canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[1] contours = sorted(contours, key = cv2.contourArea, reverse = True) img_k_copy = img_k.copy() peri = cv2.arcLength(contours[0], True) approx = cv2.approxPolyDP(contours[0], 0.02 * peri, True) cv2.drawContours(img_k_copy, [approx], -1, (0, 0, 255), 2) img_pers = pers_transform(img_gray, approx.reshape(4, 2)) #轮廓是三维的,所以reshape一下,可以变成2维 img_pers_bin = cv2.threshold(img_pers, 150, 255, cv2.THRESH_BINARY)[1] cv2.imwrite('scan.jpg', img_pers_bin) print('Accepted') test.scan(dst)
test.py
from PIL import Image import cv2 import numpy as np import pytesseract def show(img): cv2.imshow('name', img) cv2.waitKey(0) cv2.destroyAllWindows() def scan(dst): img = cv2.imread(dst) # img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # img_bin = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] img_k = Image.open(dst) text = pytesseract.image_to_string(img_k) print(text) show(img)