研究了点OCR识别,本文讲下opencv方式-找出字符区域,虽然还不完善,但是记录下,后续往CNN+RNN+CTC方向走,此处就作为练手了。
效果1:


效果2:


效果3:


效果4(识别率不太好,只把大框识别了,字符的分割有问题):


import cv2
import imutils
import numpy as np
from imageio import imread
import math
import matplotlib.pyplot as plt
def point_distance(p1, p2):
return math.sqrt(math.pow(p2[0] - p1[0], 2) + math.pow(p2[1] - p1[1], 2))
def calc_height_width(box):
width = point_distance(box[1], box[0])
height = point_distance(box[0], box[3])
return (width, height)
fileName = 'test1'
img = imread('imgs\' + fileName + '.jpg')
img = imutils.resize(img, width=1920, height=2080)
cannyImg = cv2.Canny(img, 200, 200)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
closed = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
eroded = cv2.erode(closed, kernel)
cannyImg = cv2.Canny(eroded, 200, 200)
blurred = cv2.GaussianBlur(cannyImg, (105, 105), 0)
# blurred = cv2.GaussianBlur(cannyImg, (15, 15),0)
_, skin = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
contours, hierarchy = cv2.findContours(skin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)
boxes = []
for i in range(len(contours)):
c = contours[i]
rect = cv2.minAreaRect(c)
box = np.int0(cv2.boxPoints(rect))
(w, h) = calc_height_width(box)
if w == 0 or h == 0:
continue
if w < 20 or h < 20:
continue
# boxes.append(box)
rate1 = h / w * 100
rate2 = w / h * 100
if (10 <= rate1 <= 20) or (10 <= rate2 <= 20):
print((w, h), '--------', rate1, '%', rate2, '%')
boxes.append(box)
img = img.copy()
i = 0
def parse_chars(positions, min_thresh, min_range, max_range):
charInfos = []
begin = 0
end = 0
for idx in range(len(positions)):
if positions[idx] > min_thresh and begin == 0:
begin = idx
elif positions[idx] > min_thresh and begin != 0:
if idx - begin > max_range:
charInfo = {'begin': begin, 'end': idx}
charInfos.append(charInfo)
begin = 0
end = 0
continue
elif positions[idx] < min_thresh and begin != 0:
end = idx
if end - begin >= min_range:
charInfo = {'begin': begin, 'end': end}
charInfos.append(charInfo)
begin = 0
end = 0
elif positions[idx] < min_thresh or begin == 0:
continue
return charInfos
def process_more(windowName, imgSrc):
ori_imgSrc = imgSrc.copy()
# cv2.imshow(windowName+'111', ori_imgSrc)
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
closed = cv2.morphologyEx(imgSrc, cv2.MORPH_CLOSE, kernel)
imgSrc = cv2.erode(closed, kernel)
imgSrc = cv2.Canny(imgSrc, 300, 300)
kernel = np.ones((5, 5), np.uint8)
imgSrc = cv2.dilate(imgSrc, kernel, iterations=1)
_, imgSrc = cv2.threshold(imgSrc, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# rows---> ori_imgSrc.shape[0]
# cols---> ori_imgSrc.shape[1]
rows = ori_imgSrc.shape[0]
cols = ori_imgSrc.shape[1]
tags = np.zeros((cols))
for row in range(rows):
for col in range(cols):
if imgSrc[row][col] == 255:
tags[col] += 1
char_positions = parse_chars(positions=tags, min_thresh=8, min_range=25, max_range=100)
print(len(char_positions))
for p in char_positions:
leftTop = (p['begin'], 0)
rightBottom = (p['end'], rows - 2)
cv2.rectangle(ori_imgSrc, (leftTop[0], leftTop[1]), (rightBottom[0], rightBottom[1]), (0, 255, 0), 2)
ori_imgSrc = imutils.resize(ori_imgSrc, width=450)
cv2.imshow(windowName, ori_imgSrc)
for box in boxes:
# img = cv2.drawContours(img, [box], -1, (0, 0, 255), 3)
x_from = np.min(box[:, 1])
x_end = np.max(box[:, 1])
y_from = np.min(box[:, 0])
y_end = np.max(box[:, 0])
if x_from < 0:
x_from = 0
if y_from < 0:
y_from = 0
img_tmp = img[x_from:x_end, y_from:y_end]
# cv2.imshow("ffff111" + str(i), img_tmp)
(w, h) = calc_height_width(box)
if w > h:
# 左上角, 左下角,右上角
# 3,2,4
matSrc = np.float32([
[box[2][0], box[2][1]],
[box[1][0], box[1][1]],
[box[3][0], box[3][1]]
])
matDst = np.float32([
[0, 0],
[0, h],
[w, 0]
])
matAffine = cv2.getAffineTransform(matSrc, matDst)
dst = cv2.warpAffine(img, matAffine, (int(w), int(h)))
else:
# 左上角, 左下角,右上角
# 右上角, 左上角, 右下角
# 3,2,4
# 4,3,1
matSrc = np.float32([
[box[3][0], box[3][1]],
[box[2][0], box[2][1]],
[box[0][0], box[0][1]]
])
matDst = np.float32([
[0, 0],
[0, w],
[h, 0]
])
matAffine = cv2.getAffineTransform(matSrc, matDst)
dst = cv2.warpAffine(img, matAffine, (int(h), int(w)))
process_more("ffff222asdfas" + str(i), dst.copy())
i += 1
img = imutils.resize(img, width=600, height=600)
cv2.imshow("Frame6", img)
cv2.waitKey(100000) & 0xFF
cv2.destroyAllWindows()