zoukankan      html  css  js  c++  java
  • 图像ocr识别(一)

    研究了点OCR识别,本文讲下opencv方式-找出字符区域,虽然还不完善,但是记录下,后续往CNN+RNN+CTC方向走,此处就作为练手了。

    效果1:

    效果2:

    效果3:

    效果4(识别率不太好,只把大框识别了,字符的分割有问题):

    import cv2
    import imutils
    import numpy as np
    from imageio import imread
    import math
    import matplotlib.pyplot as plt
    
    
    def point_distance(p1, p2):
        return math.sqrt(math.pow(p2[0] - p1[0], 2) + math.pow(p2[1] - p1[1], 2))
    
    
    def calc_height_width(box):
        width = point_distance(box[1], box[0])
        height = point_distance(box[0], box[3])
        return (width, height)
    
    
    fileName = 'test1'
    
    img = imread('imgs\' + fileName + '.jpg')
    img = imutils.resize(img, width=1920, height=2080)
    
    
    
    cannyImg = cv2.Canny(img, 200, 200)
    
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    closed = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
    eroded = cv2.erode(closed, kernel)
    
    cannyImg = cv2.Canny(eroded, 200, 200)
    blurred = cv2.GaussianBlur(cannyImg, (105, 105), 0)
    # blurred = cv2.GaussianBlur(cannyImg, (15, 15),0)
    
    
    _, skin = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    
    contours, hierarchy = cv2.findContours(skin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    contours = sorted(contours, key=cv2.contourArea, reverse=True)
    
    boxes = []
    for i in range(len(contours)):
        c = contours[i]
        rect = cv2.minAreaRect(c)
        box = np.int0(cv2.boxPoints(rect))
        (w, h) = calc_height_width(box)
        if w == 0 or h == 0:
            continue
        if w < 20 or h < 20:
            continue
        # boxes.append(box)
        rate1 = h / w * 100
        rate2 = w / h * 100
        if (10 <= rate1 <= 20) or (10 <= rate2 <= 20):
            print((w, h), '--------', rate1, '%', rate2, '%')
            boxes.append(box)
    
    img = img.copy()
    i = 0
    
    
    def parse_chars(positions, min_thresh, min_range, max_range):
        charInfos = []
        begin = 0
        end = 0
        for idx in range(len(positions)):
            if positions[idx] > min_thresh and begin == 0:
                begin = idx
            elif positions[idx] > min_thresh and begin != 0:
                if idx - begin > max_range:
                    charInfo = {'begin': begin, 'end': idx}
                    charInfos.append(charInfo)
    
                    begin = 0
                    end = 0
                continue
            elif positions[idx] < min_thresh and begin != 0:
                end = idx
                if end - begin >= min_range:
                    charInfo = {'begin': begin, 'end': end}
                    charInfos.append(charInfo)
    
                    begin = 0
                    end = 0
            elif positions[idx] < min_thresh or begin == 0:
                continue
    
        return charInfos
    
    
    def process_more(windowName, imgSrc):
        ori_imgSrc = imgSrc.copy()
        # cv2.imshow(windowName+'111', ori_imgSrc)
    
        kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
        closed = cv2.morphologyEx(imgSrc, cv2.MORPH_CLOSE, kernel)
        imgSrc = cv2.erode(closed, kernel)
    
        imgSrc = cv2.Canny(imgSrc, 300, 300)
        kernel = np.ones((5, 5), np.uint8)
        imgSrc = cv2.dilate(imgSrc, kernel, iterations=1)
        _, imgSrc = cv2.threshold(imgSrc, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
        # rows---> ori_imgSrc.shape[0]
        # cols---> ori_imgSrc.shape[1]
        rows = ori_imgSrc.shape[0]
        cols = ori_imgSrc.shape[1]
        tags = np.zeros((cols))
    
        for row in range(rows):
            for col in range(cols):
                if imgSrc[row][col] == 255:
                    tags[col] += 1
    
        char_positions = parse_chars(positions=tags, min_thresh=8, min_range=25, max_range=100)
    
    print(len(char_positions)) for p in char_positions: leftTop = (p['begin'], 0) rightBottom = (p['end'], rows - 2) cv2.rectangle(ori_imgSrc, (leftTop[0], leftTop[1]), (rightBottom[0], rightBottom[1]), (0, 255, 0), 2) ori_imgSrc = imutils.resize(ori_imgSrc, width=450) cv2.imshow(windowName, ori_imgSrc) for box in boxes: # img = cv2.drawContours(img, [box], -1, (0, 0, 255), 3) x_from = np.min(box[:, 1]) x_end = np.max(box[:, 1]) y_from = np.min(box[:, 0]) y_end = np.max(box[:, 0]) if x_from < 0: x_from = 0 if y_from < 0: y_from = 0 img_tmp = img[x_from:x_end, y_from:y_end] # cv2.imshow("ffff111" + str(i), img_tmp) (w, h) = calc_height_width(box) if w > h: # 左上角, 左下角,右上角 # 3,2,4 matSrc = np.float32([ [box[2][0], box[2][1]], [box[1][0], box[1][1]], [box[3][0], box[3][1]] ]) matDst = np.float32([ [0, 0], [0, h], [w, 0] ]) matAffine = cv2.getAffineTransform(matSrc, matDst) dst = cv2.warpAffine(img, matAffine, (int(w), int(h))) else: # 左上角, 左下角,右上角 # 右上角, 左上角, 右下角 # 3,2,4 # 4,3,1 matSrc = np.float32([ [box[3][0], box[3][1]], [box[2][0], box[2][1]], [box[0][0], box[0][1]] ]) matDst = np.float32([ [0, 0], [0, w], [h, 0] ]) matAffine = cv2.getAffineTransform(matSrc, matDst) dst = cv2.warpAffine(img, matAffine, (int(h), int(w))) process_more("ffff222asdfas" + str(i), dst.copy()) i += 1 img = imutils.resize(img, width=600, height=600) cv2.imshow("Frame6", img) cv2.waitKey(100000) & 0xFF cv2.destroyAllWindows()

      

  • 相关阅读:
    03_ if 练习 _ little2big
    uva 11275 3D Triangles
    uva 12296 Pieces and Discs
    uvalive 3218 Find the Border
    uvalive 2797 Monster Trap
    uvalive 4992 Jungle Outpost
    uva 2218 Triathlon
    uvalive 3890 Most Distant Point from the Sea
    uvalive 4728 Squares
    uva 10256 The Great Divide
  • 原文地址:https://www.cnblogs.com/aarond/p/OCR-1.html
Copyright © 2011-2022 走看看