- knn流程
- 数据读取
- 数据归一化
- knn实现
- 数据分析
以数据的前两项特征对数据进行划分得到以下散点图:
- 具体实现
import numpy as np
def read_data(path):
lines=path.readlines()
data=[]
label=[]
for line in lines:
line=line.split()
data.append(list(map(float,line[0:3])))
if line[-1]=='didntLike':
label.append(3)
elif line[-1]=='largeDoses':
label.append(2)
else:
label.append(1)
return np.array(data),np.array(label)
def normalized(data):
mindata=data.min(0)
def standdata(traindata):
meandata0 = np.mean(traindata,axis=0)
stddata0 = np.std(traindata,axis=0)
length = traindata.shape[0]
meandata1 = np.tile(meandata0,(length,1))
stddata1 = np.tile(stddata0,(length,1))
standdata = (traindata-meandata1)/stddata1
return standdata, meandata0, stddata0
def autoNorm(x):
"""
最大值最小值归一化
:param x: 需要归一化的特征向量
:return: 新的数组、极差、最小值
"""
minVals=x.min(axis=0)
maxVals=x.max(axis=0)
ranges=maxVals-minVals
x_new=(x-minVals)/ranges # 广播
return x_new,ranges,minVals
def knn(traindata,testdata,label,k):
distance=np.sqrt(np.sum((traindata-testdata)**2,axis=1))
p=distance.argsort()
vote = [0, 0, 0]
for i in range(k):
vote[label[p[i]]-1]=vote[label[p[i]]-1]+1
return vote.index(max(vote))+1
def testknn(data,label,k):
# per=np.random.permutation(np.shape(data)[0])
# new_data=data[per,:]
# new_label=label[per]
train_data=data[0:int(np.shape(data)[0]*0.9)]
train_label=label[0:int(np.shape(data)[0]*0.9)]
test_data=data[int(np.shape(data)[0]*0.9):]
test_label = label[int(np.shape(data)[0] * 0.9):]
true_label=0
for i in range(len(test_label)):
result_a=knn(train_data,test_data[i],train_label,k)
if result_a==test_label[i]:
true_label=true_label+1
print(result_a,test_label[i])
acc=float(true_label)/len(test_data)
return acc
if __name__ == '__main__':
path='Knn_Helen'
true_label=["smallDoses",'largeDoses','didntLike']
file=open(path,'r')
print('=======')
data,label=read_data(file)
a,b,c=autoNorm(data)
acc=testknn(a,label,25)
print(acc)
准确率能达到95%以上