kNN.py
1 #!/usr/bin/python 2 # -*- coding:utf8 -*- 3 4 from numpy import * 5 import operator 6 7 #创造数据集 8 def createDataSet(): 9 group = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]]) 10 labels = ['A', 'A', 'B', 'B'] 11 return group, labels 12 13 """ 14 #将inX扩展到和训练样本集dataSet一样的行数 15 diffMat = tile(inX, (dataSetSize, 1)) - dataSet 16 tile(inX, n):拓展长度 17 tile(inX, (m, n):m-拓展个数,拓展长度 18 19 sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True) 20 以排出的每组数据的第一个元素的大小为准,按降序排列。 21 e.g:[('A', 2), ('B', 1)] 22 """ 23 def classify0(inX, dataSet, labels, k): 24 dataSetSize = dataSet.shape[0] #训练样本行数(矩阵第一维度的长度) 25 diffMat = tile(inX, (dataSetSize, 1)) - dataSet 26 sqDiffMat = diffMat**2 27 sqDistances = sqDiffMat.sum(axis=1) 28 distances = sqDistances**0.5 #欧氏距离计算 29 sortedDistIndicies = distances.argsort()#按元素大小升序,将无数对应的索引(index)输出 30 classCount = {} 31 for i in range(k): 32 voteIlabel = labels[sortedDistIndicies[i]] #输出上面相应索引(index)对应的label 33 classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1 #统计label个数 34 sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True) 35 return sortedClassCount[0][0]
kNN_test.py
1 import kNN 2 3 group, labels = kNN.createDataSet() 4 print (group) 5 print (labels) 6 7 inX = [2, 1] 8 testResult = kNN.classify0(inX, group, labels, 3) 9 print (testResult)
result:
1 [[ 1. 1.1] 2 [ 1. 1. ] 3 [ 0. 0. ] 4 [ 0. 0.1]] 5 ['A', 'A', 'B', 'B'] 6 A