zoukankan      html  css  js  c++  java
  • knn 分类Helen数据集

    • knn流程
    1. 数据读取
    2. 数据归一化
    3. knn实现
    • 数据分析

    以数据的前两项特征对数据进行划分得到以下散点图:

    • 具体实现
    import numpy as np
    def read_data(path):
        lines=path.readlines()
        data=[]
        label=[]
        for line in lines:
            line=line.split()
            data.append(list(map(float,line[0:3])))
            if line[-1]=='didntLike':
                label.append(3)
            elif line[-1]=='largeDoses':
                label.append(2)
            else:
                label.append(1)
        return np.array(data),np.array(label)
    
    def normalized(data):
        mindata=data.min(0)
    
    def standdata(traindata):
        meandata0 = np.mean(traindata,axis=0)
        stddata0 = np.std(traindata,axis=0)
        length = traindata.shape[0]
        meandata1 = np.tile(meandata0,(length,1))
        stddata1 = np.tile(stddata0,(length,1))
        standdata = (traindata-meandata1)/stddata1
        return standdata, meandata0, stddata0
    def autoNorm(x):
        """
        最大值最小值归一化
        :param x: 需要归一化的特征向量
        :return: 新的数组、极差、最小值
        """
    
        minVals=x.min(axis=0)
        maxVals=x.max(axis=0)
        ranges=maxVals-minVals
    
        x_new=(x-minVals)/ranges # 广播
    
        return x_new,ranges,minVals
    
    def knn(traindata,testdata,label,k):
        distance=np.sqrt(np.sum((traindata-testdata)**2,axis=1))
        p=distance.argsort()
        vote = [0, 0, 0]
        for i in range(k):
            vote[label[p[i]]-1]=vote[label[p[i]]-1]+1
        return vote.index(max(vote))+1
    
    def testknn(data,label,k):
        # per=np.random.permutation(np.shape(data)[0])
        # new_data=data[per,:]
        # new_label=label[per]
        train_data=data[0:int(np.shape(data)[0]*0.9)]
        train_label=label[0:int(np.shape(data)[0]*0.9)]
        test_data=data[int(np.shape(data)[0]*0.9):]
        test_label = label[int(np.shape(data)[0] * 0.9):]
        true_label=0
        for i in range(len(test_label)):
            result_a=knn(train_data,test_data[i],train_label,k)
            if result_a==test_label[i]:
                true_label=true_label+1
            print(result_a,test_label[i])
        acc=float(true_label)/len(test_data)
        return acc
    
    if __name__ == '__main__':
        path='Knn_Helen'
        true_label=["smallDoses",'largeDoses','didntLike']
        file=open(path,'r')
        print('=======')
        data,label=read_data(file)
        a,b,c=autoNorm(data)
        acc=testknn(a,label,25)
        print(acc)
    

    准确率能达到95%以上

  • 相关阅读:
    [原]Eclipse 安装SVN、Maven插件(补充)
    [原]几个云笔记的简单比较
    [原]Unity3d中奇怪的编译错误
    [原]unity中WWW isDone方法只能在主线程中调用
    C语言。格式化符号
    Unity iOS Guideline 1.3
    AR增强现实 之Metaio For Unity 开发 之HelloWorld
    Unity 4.6 bate 20 or 4.5.5 +vuforia3.0.9 发布到真机错误 解决
    服务器端Ajax异步分页类,基本通用分页类
    从数据库中读取数据并写入到Excle电子表格之2
  • 原文地址:https://www.cnblogs.com/peng-yuan/p/14703275.html
Copyright © 2011-2022 走看看