人工智能是当下很热门的话题,手写识别是一个典型的应用。为了进一步了解这个领域,我阅读了大量的论文,并借助opencv完成了对28x28的数字图片(预处理后的二值图像)的识别任务。
预处理一张图片:
首先采用opencv读取图片的构造函数读取灰度的图片,再采用大津法求出图片的二值化的阈值,并且将图片二值化。
1 int otsu(const IplImage* src_image) { 2 double sum = 0.0; 3 double w0 = 0.0; 4 double w1 = 0.0; 5 double u0_temp = 0.0; 6 double u1_temp = 0.0; 7 double u0 = 0.0; 8 double u1 = 0.0; 9 double delta_temp = 0.0; 10 double delta_max = 0.0; 11 12 int pixel_count[256] = { 0 }; 13 float pixel_pro[256] = { 0 }; 14 int threshold = 0; 15 uchar* data = (uchar*)src_image->imageData; 16 for (int i = 0; i < src_image->height; i++) { 17 for (int j = 0; j < src_image->width; j++) { 18 pixel_count[(int)data[i * src_image->width + j]]++; 19 sum += (int)data[i * src_image->width + j]; 20 } 21 } 22 for (int i = 0; i < 256; i++) { 23 pixel_pro[i] = (float)pixel_count[i] / (src_image->height * src_image->width); 24 } 25 for (int i = 0; i < 256; i++) { 26 w0 = w1 = u0_temp = u1_temp = u0 = u1 = delta_temp = 0; 27 for (int j = 0; j < 256; j++) { 28 if (j <= i) { 29 w0 += pixel_pro[j]; 30 u0_temp += j * pixel_pro[j]; 31 } 32 else { 33 w1 += pixel_pro[j]; 34 u1_temp += j * pixel_pro[j]; 35 } 36 } 37 u0 = u0_temp / w0; 38 u1 = u1_temp / w1; 39 delta_temp = (float)(w0 *w1* pow((u0 - u1), 2)); 40 if (delta_temp > delta_max) { 41 delta_max = delta_temp; 42 threshold = i; 43 } 44 } 45 return threshold; 46 }
1 void imageBinarization(IplImage* src_image) { 2 IplImage* binImg = cvCreateImage(cvGetSize(src_image), src_image->depth, src_image->nChannels); 3 CvScalar s; 4 int ave = 0; 5 int binThreshold = otsu(src_image); 6 7 for (int i = 0; i < src_image->height; i++) { 8 for (int j = 0; j < src_image->width; j++) { 9 s = cvGet2D(src_image, i, j); 10 ave = (s.val[0] + s.val[1] + s.val[2]) / 3; 11 if (ave < binThreshold) { 12 s.val[0] = s.val[1] = s.val[2] = 0xff; 13 cvSet2D(src_image, i, j, s); 14 } 15 else { 16 s.val[0] = s.val[1] = s.val[2] = 0x00; 17 cvSet2D(src_image, i, j, s); 18 } 19 } 20 } 21 cvCopy(src_image, binImg); 22 cvSaveImage(bined, binImg); 23 //cvShowImage("binarization", binImg); 24 //waitKey(0); 25 }
由于是只进行简单的识别模拟,因此没有做像素断点的处理。获取minst提供的数据集,提取每个图片的hog特征,参数如下:
1 HOGDescriptor *hog = new HOGDescriptor( 2 cvSize(ImgWidht, ImgHeight), cvSize(14, 14), cvSize(7, 7), cvSize(7, 7), 9);
(9个方向换成18个可能会取得更准确的结果,这取决于对图片本身的复杂程度的分析
之后即可训练knn分类器,进行分类了。
1 void knnTrain() { 2 #ifdef SAVETRAINED 3 //knn training; 4 samples.clear(); 5 dat_mat = Mat::zeros(10 * nImgNum, 324, CV_32FC1); 6 res_mat = Mat::zeros(10 * nImgNum, 1, CV_32FC1); 7 for (int i = 0; i != 10; i++) { 8 getFile(dirNames[i], i); 9 } 10 preTrain(); 11 cout << "------ Training finished. -----" << endl << endl; 12 knn.train(dat_mat, res_mat, Mat(), false, 2); 13 14 #ifdef SAVEASXML 15 knn.save("./trained/knnTrained.xml"); 16 #endif 17 18 #else 19 knn.load("./trained/knnTrained.xml"); 20 #endif 21 22 //knn test 23 cout << endl << "--- KNN test mode : ---" << endl; 24 int tCnt = 10000; 25 int tAc = 0; 26 selfknnTest(tCnt, tAc); 27 28 cout << endl << endl << "Total number of test samples : " << tCnt << endl; 29 30 cout << "Accuracy : " << float(float(tAc) / float(tCnt)) * 100 << "%" << endl; 31 }
训练结果如下,准确率还是很令人满意的。