import numpy as np # bmp 图片后缀 import matplotlib.pyplot as plt %matplotlib inline from sklearn.neighbors import KNeighborsClassifier
提炼样本数据
img_arr = plt.imread('./data/3/3_100.bmp') plt.imshow(img_arr)
读出所有的数据
feature = [] target = [] for i in range(0,10): for j in range(1,501): img_path = './data/'+str(i)+'/'+str(i)+'_'+str(j+1)+'.bmp' img_arr = plt.imread(img_path) feature.append(img_arr) target.append(i)
样本数据的提取
feature = np.array(featrue)
target = np.array(target)
feature.shape
target.shape
#feature是一个三维数组(执行将维操作) feature = feature.reshape(5000,28*28) feature.shape
将样本数据打乱
np.random.seed(3) np.random.shuffle(feature) np.random.seed(3) np.random.shuffle(target)
获取训练数据和测试数据
x_train = feature[:4950] y_train = target[:4950] x_test = feature[-50:] y_test = target[-50:]
实例化模型对象,训练
knn = KNeighborsClassifier(n_neighbors=30)
knn.fit(x_train,y_train)
knn.score(x_train,y_train)
print('预测分类:',knn.predict(x_test)) print('真实数据:',y_test)
模型的保存
from sklearn.externals import joblib joblib.dump(knn,"./knn.m"
读取模型
knn = joblib.load("./knn.m")
让模型进行外部图片的识别
img_arr = plt.imread('./数字.jpg') plt.imshow(img_arr)
利用切片取值
five_arr = img_arr[95:150,85:1305]
plt.imshow(new_arr)
#five数组是三维的,需要进行降维,舍弃第三个表示颜色的维度 five_arr = five_arr.mean(axis=2)
five_arr.shape
import scipy.ndimage as ndimage five = ndimage.zoom(five_arr,zoom = (28/65,28/55))
knn.predict(five.reshape(1,784))