zoukankan      html  css  js  c++  java
  • OpenCV在字符提取中进行的预处理(转)

    OCR简介
    熟悉OCR的人都了解,OCR大致分为两个部分:

    -文字提取text extractor
    -文字识别text recognition

    其中,第一部分是属于图像处理部分,涉及到图像分割的知识,而第二部分则大多数利用谷歌的Tesseract来进行字符的识别,设计到的东西不多,当然也不难,难的是要能够做到非常准确的识别率,以及它的识别速率。

    文字提取
    这一部分工作是很关键的,因为文字提取的好坏,直接影响到最后的识别结果,相当于预处理部分,是非常重要的,其主要目的是为了分割出文字字符。
    主要涉及工作有:

    1. : -灰度化
    2. -锐化
    3. -Otsu
    4. -处理0和1边界值
    5. -如果有必要,还需要进行噪声去除,这里要涉及到找连通分量的相关计算;
      1 void TextDetector::segmentText(cv::Mat &spineImage, cv::Mat &segSpine, bool removeNoise){
      2 
      3     cv::Mat spineGray;
      4     cvtColor(spineImage, spineGray, CV_BGR2GRAY);
      5     imshow("gray source" , spineGray);
      6     spineGray = spineGray - 0.5;
      7 //    WriteData("/Users/eternity/Desktop/未命名文件夹/gray1.txt", spineGray);
      8 //    waitKey();
      9     cv::Mat spineAhe;
     10     adaptiveHistEqual(spineGray, spineAhe, 0.01);
     11     imshow("ahe", spineAhe);
     12 //    WriteData("/Users/eternity/Desktop/未命名文件夹/gray2.txt", spineAhe);
     13 
     14     int window_num = 40;
     15 
     16     double window_h = (spineImage.rows / (double)window_num + 1e-3);
     17 
     18     int window_w = spineImage.cols;
     19 
     20     cv::Mat spine_th = cv::Mat::zeros(spineGray.size(), CV_8U);
     21 
     22     for (int i = 0; i < window_num; i ++) {
     23         double cut_from_r = window_h * i;
     24         double cut_to_r = window_h * (i+1);
     25         cv::Mat window_img = cv::Mat::zeros(Size(cut_to_r-cut_from_r + 1, window_w), CV_8U);
     26         cv::Rect rect = cv::Rect(0, cut_from_r, window_w-1, cut_to_r - cut_from_r + 1);
     27         window_img = cv::Mat(spineGray, rect);
     28         imshow("window section", window_img);
     29 
     30         sharpenImage(window_img, window_img);
     31         imshow("sharpen", window_img);
     32 //        waitKey();
     33 //        WriteData("/Users/eternity/Desktop/未命名文件夹/gray4.txt", window_img);
     34         double max_local,min_local;
     35         minMaxLoc(window_img, &min_local, &max_local);
     36         double color_diff = max_local - min_local;
     37         double thresh;
     38         cv::Mat window_tmp;
     39         if (color_diff > 50)
     40             thresh = threshold(window_img, window_tmp, 1, 255, THRESH_OTSU);
     41         else
     42             thresh = 0;
     43 //        cout<<thresh<<endl;
     44         cv::Mat seg_window(window_img.size(), CV_64F);
     45         imgQuantize(window_img, seg_window, thresh);
     46 //        WriteData("/Users/eternity/Desktop/未命名文件夹/quantize2.txt", seg_window);
     47         seg_window = seg_window == 1;
     48 //        seg_window = seg_window / 255;
     49         //处理0边界值
     50         vector<int> cols1,cols2,rows1,rows2;
     51         findKEdgeFirst(seg_window, 0, 5, rows1, cols1);
     52         findKEdgeLast (seg_window, 0, 5, rows2, cols2);
     53         float max_zero_dist, max_one_dist;
     54         if(cols1.empty() || cols2.empty())
     55             max_zero_dist = 0.0;
     56         else{
     57             float avg_right = (rows2[0]+rows2[1]+rows2[2]+rows2[3]+rows2[4]) / (float)sizeof(rows2);
     58             float avg_left  = (rows1[0]+rows1[1]+rows1[2]+rows1[3]+rows1[4]) / (float)sizeof(rows1);
     59             max_zero_dist = avg_right - avg_left;
     60         }
     61         cols1.clear();
     62         cols2.clear();
     63         rows1.clear();
     64         rows2.clear();
     65 
     66         //处理1边界值
     67         findKEdgeFirst(seg_window, 255, 5, rows1, cols1);
     68         findKEdgeLast (seg_window, 255, 5, rows2, cols2);
     69         if(cols1.empty() || cols2.empty())
     70             max_one_dist = 0;
     71         else{
     72             float avg_right = (rows2[0]+rows2[1]+rows2[2]+rows2[3]+rows2[4]) / (float)sizeof(rows2);
     73             float avg_left  = (rows1[0]+rows1[1]+rows1[2]+rows1[3]+rows1[4]) / (float)sizeof(rows1);
     74             max_one_dist = avg_right - avg_left;
     75         }
     76         cols1.clear();
     77         cols2.clear();
     78         rows1.clear();
     79         rows2.clear();
     80 
     81         cv::Mat idx;
     82         findNonZero(seg_window, idx);
     83         int one_count = (int)idx.total();
     84         int zero_count = (int)seg_window.total() - one_count;
     85 
     86         float one_zero_diff = max_one_dist - max_zero_dist;
     87         float  dist_limit = 5;
     88 
     89         if(one_zero_diff > dist_limit)
     90             seg_window = ~ seg_window;
     91         else{
     92             if(one_zero_diff > -dist_limit && one_count > zero_count)
     93                 seg_window = ~ seg_window;
     94         }
     95 
     96         seg_window.copyTo(cv::Mat( spine_th, rect));
     97 //        imshow("spine_th", spine_th);
     98 //        waitKey();
     99 
    100 
    101     }
    102     //去除噪声
    103     if (removeNoise) {
    104         vector<vector<cv::Point>> contours;
    105         imshow("spine_th", spine_th);
    106 //        WriteData("/Users/eternity/Desktop/未命名文件夹/quantize1.txt", spine_th);
    107 //        waitKey();
    108         findContours(spine_th, contours, RETR_EXTERNAL, CHAIN_APPROX_NONE);
    109 
    110         for (int i = 0; i < contours.size(); i ++) {
    111             //compute bounding rect
    112             cv::Rect rect = boundingRect(contours[i]);
    113             double bbox_aspect = rect.width / (double)rect.height;
    114             int bbox_area = rect.width * rect.height;
    115             //compute solidity
    116             vector<vector<Point>> hull(1);
    117             convexHull( contours[i], hull[0] );
    118             double convex_area = contourArea(hull[0]);
    119             double solidity = bbox_area / convex_area;
    120 
    121             for (int j = 0; j < contours[i].size(); j ++) {
    122                 if ( (rect.width > spineImage.cols / 1.001)
    123                     || (rect.width > spineImage.cols / 1.4 && bbox_aspect > 5.0)
    124                     || (rect.height > spineImage.cols / 1.1)
    125                     || (bbox_area < pow(spineImage.cols/30, 2))
    126                     || (bbox_aspect > 0.5 && bbox_aspect < 1.7 && solidity > 0.9) )
    127 
    128                     spine_th.at<int>(contours[i][j].x, contours[i][j].y) = 0;
    129 //                WriteData("/Users/eternity/Desktop/未命名文件夹/quantize2.txt", spine_th);
    130             }
    131 
    132 
    133         }
    134 
    135     }
    136     segSpine = spine_th;
    137 //    transpose(segSpine, segSpine);
    138 //    flip(segSpine, segSpine, 0);
    139     imshow("segspine", segSpine);
    140 //    waitKey();
    141     spine_th.release();
    142 
    143 
    144 
    145 }
    146 //对图片进行level量化
    147 void TextDetector::imgQuantize(cv::Mat &src, cv::Mat &dst, double level){
    148     dst = cv::Mat::zeros(src.rows, src.cols, CV_8U);
    149     for (int i = 0; i < src.rows; i ++) {
    150         uchar *data = src.ptr<uchar>(i);
    151         uchar *data2 = dst.ptr<uchar>(i);
    152         for (int j = 0; j < src.cols; j ++) {
    153             if(data[j] <= level)
    154                 data2[j] = 1;
    155             else
    156                 data2[j] = 2;
    157 
    158         }
    159     }
    160 
    161 }
    162 //找出最左边界处,前edgeValue个值为k的边界值
    163 void TextDetector::findKEdgeFirst(cv::Mat &data, int edgeValue,int k,vector<int> &rows,vector<int> &cols){
    164     int count = 0;
    165     for (int i = 0; i < data.cols; i ++) {
    166         uchar *u = data.ptr<uchar>(i);
    167         for (int j = 0; j < data.rows; j ++) {
    168             if(edgeValue == (int)u[j]){
    169                 if(count < k){
    170                     count ++;
    171                     cols.push_back(i);
    172                     rows.push_back(j);
    173                 }
    174 
    175             }
    176 
    177         }
    178     }
    179 
    180 }
    181 //找出最右边界处,倒数edgeValue个值为k的边界值
    182 void TextDetector::findKEdgeLast(cv::Mat &data, int edgeValue,int k,vector<int> &rows, vector<int> &cols){
    183     int count = 0;
    184     for (int i = data.cols - 1; i >= 0; i --) {
    185         uchar *u = data.ptr<uchar>(i);
    186         for (int j = data.rows - 1; j >= 0; j --) {
    187             if(edgeValue == (int)u[j]){
    188                 if(count < k){
    189                     count ++;
    190                     cols.push_back(i);
    191                     rows.push_back(j);
    192                 }
    193 
    194             }
    195         }
    196 
    197     }
    198 
    199 }
    200 //直方图均衡
    201 void TextDetector::adaptiveHistEqual(cv::Mat &src,cv::Mat &dst,double clipLimit)
    202 {
    203     Ptr<cv::CLAHE> clahe = createCLAHE();
    204     clahe->setClipLimit(clipLimit);
    205     clahe->apply(src, dst);
    206 }
    

    ---------------------
    作者:eternity1118_
    来源:CSDN
    原文:https://blog.csdn.net/eternity1118_/article/details/52575374
    版权声明:本文为博主原创文章,转载请附上博文链接!

  • 相关阅读:
    flask-admin章节二:wtforms的使用以及在数据库场景中使用QuerySelectField代替SelectField
    flask-admin章节一:使用chartkick画报表
    flask-admin众博客概述
    python smtplib发送邮件遇到的认证问题
    python logging模块可能会令人困惑的地方
    Markdown
    SpringBoot-启动过程
    SpringBoot-目录
    AbstractQueuedSynchronizer
    ThreadLocal
  • 原文地址:https://www.cnblogs.com/abella/p/9964260.html
Copyright © 2011-2022 走看看