zoukankan      html  css  js  c++  java
  • KNN算法实现

    import csv
    import random
    import math
    import operator
    import numpy as np

    def loadDataset(filename,split,trainingSet=[],testSet=[]): #加载数据集,并随机分为训练集和测试集
    with open(filename,'rt') as csvfile:
    lines=csv.reader(csvfile) #读取所有行
    dataset=list(lines) #把数据转为list数据结构
    for x in range(len(dataset)-1):
    for y in range(4):
    dataset[x][y] = float(dataset[x][y])
    if random.random() <split:
    trainingSet.append(dataset[x])
    else:
    testSet.append(dataset[x])
    #print(dataset)

    def euDistance(instance1,instance2,length): #计算欧式距离
    distance =0
    instance11=[]
    instance21 = []
    instance11.append(instance1[:4])
    instance21.append(instance2[:4])
    for y in range(4):
    distance += pow((instance11[0][y] - instance21[0][y]), 2)
    #print(math.sqrt(distance)," ",type(math.sqrt(distance)))
    return math.sqrt(distance)

    def getNeighbors(trainingSet,testInstance,k): #对一个测试实例得到训练集中K个邻居
    distances=[]
    length=len(testInstance)-1
    for x in range(len(trainingSet)):
    dist = euDistance(testInstance,trainingSet[x],length) #计算一个测试实例样本到训练集中所有样本的距离
    distances.append((trainingSet[x],dist)) #拷贝训练集,并加上每一个样本到测试实例的距离,存放在distance中
    distances.sort(key=operator.itemgetter(1)) #排序,小到大
    neighbors=[]
    for x in range(k):
    neighbors.append(distances[x][0]) #返回最近的n个邻居:内容为:([样本],距离)
    return neighbors

    def getResponse(neighbors): #投票决定k个邻居中最近的那个
    #print(neighbors)
    classVotes={}
    for x in range(len(neighbors)):
    responses = neighbors[x][-1] #-1代表最后一个
    if responses in classVotes:
    classVotes[responses]+=1
    else:
    classVotes[responses]=1
    sorteVotes=sorted(classVotes.items(),key=operator.itemgetter(1),reverse=True)
    #print(sorteVotes)
    return sorteVotes[0][0] #所预测的此次测试样本归属的分类

    def getAccuracy(testSet,prediction):
    correct = 0
    for x in range(len(testSet)):
    if testSet[x][-1]==prediction[x]:
    correct += 1
    return (correct/float(len(testSet)))*100.

    def main():
    trainSet=[]
    testSet=[]
    split=0.67
    loadDataset(r"irisdata.txt",split,trainSet,testSet)
    print("Train Set:",repr(len(trainSet))) #训练集
    print("Test ",repr(len(testSet))) #测试集
    predictions=[]
    k=3
    for x in range(len(testSet)):
    neighbors=getNeighbors(trainSet,testSet[x],k) #测试集中每一个样本的(测试集中的)最近K个邻居,内容为:([样本],距离)
    result=getResponse(neighbors) #投票得到最近的那个归属的分类
    predictions.append(result)
    print("predicted="+repr(result)+", actual="+repr(testSet[x][-1]))
    accuracy = getAccuracy(testSet,predictions)
    print("accuracy:"+repr(accuracy)+"%")

    main()



    #################################以下为irisdata.txt源数据,拷贝放置在python执行文件目录中即可食用
    5.1,3.5,1.4,0.2,Iris-setosa
    4.9,3.0,1.4,0.2,Iris-setosa
    4.7,3.2,1.3,0.2,Iris-setosa
    4.6,3.1,1.5,0.2,Iris-setosa
    5.0,3.6,1.4,0.2,Iris-setosa
    5.4,3.9,1.7,0.4,Iris-setosa
    4.6,3.4,1.4,0.3,Iris-setosa
    5.0,3.4,1.5,0.2,Iris-setosa
    4.4,2.9,1.4,0.2,Iris-setosa
    4.9,3.1,1.5,0.1,Iris-setosa
    5.4,3.7,1.5,0.2,Iris-setosa
    4.8,3.4,1.6,0.2,Iris-setosa
    4.8,3.0,1.4,0.1,Iris-setosa
    4.3,3.0,1.1,0.1,Iris-setosa
    5.8,4.0,1.2,0.2,Iris-setosa
    5.7,4.4,1.5,0.4,Iris-setosa
    5.4,3.9,1.3,0.4,Iris-setosa
    5.1,3.5,1.4,0.3,Iris-setosa
    5.7,3.8,1.7,0.3,Iris-setosa
    5.1,3.8,1.5,0.3,Iris-setosa
    5.4,3.4,1.7,0.2,Iris-setosa
    5.1,3.7,1.5,0.4,Iris-setosa
    4.6,3.6,1.0,0.2,Iris-setosa
    5.1,3.3,1.7,0.5,Iris-setosa
    4.8,3.4,1.9,0.2,Iris-setosa
    5.0,3.0,1.6,0.2,Iris-setosa
    5.0,3.4,1.6,0.4,Iris-setosa
    5.2,3.5,1.5,0.2,Iris-setosa
    5.2,3.4,1.4,0.2,Iris-setosa
    4.7,3.2,1.6,0.2,Iris-setosa
    4.8,3.1,1.6,0.2,Iris-setosa
    5.4,3.4,1.5,0.4,Iris-setosa
    5.2,4.1,1.5,0.1,Iris-setosa
    5.5,4.2,1.4,0.2,Iris-setosa
    4.9,3.1,1.5,0.1,Iris-setosa
    5.0,3.2,1.2,0.2,Iris-setosa
    5.5,3.5,1.3,0.2,Iris-setosa
    4.9,3.1,1.5,0.1,Iris-setosa
    4.4,3.0,1.3,0.2,Iris-setosa
    5.1,3.4,1.5,0.2,Iris-setosa
    5.0,3.5,1.3,0.3,Iris-setosa
    4.5,2.3,1.3,0.3,Iris-setosa
    4.4,3.2,1.3,0.2,Iris-setosa
    5.0,3.5,1.6,0.6,Iris-setosa
    5.1,3.8,1.9,0.4,Iris-setosa
    4.8,3.0,1.4,0.3,Iris-setosa
    5.1,3.8,1.6,0.2,Iris-setosa
    4.6,3.2,1.4,0.2,Iris-setosa
    5.3,3.7,1.5,0.2,Iris-setosa
    5.0,3.3,1.4,0.2,Iris-setosa
    7.0,3.2,4.7,1.4,Iris-versicolor
    6.4,3.2,4.5,1.5,Iris-versicolor
    6.9,3.1,4.9,1.5,Iris-versicolor
    5.5,2.3,4.0,1.3,Iris-versicolor
    6.5,2.8,4.6,1.5,Iris-versicolor
    5.7,2.8,4.5,1.3,Iris-versicolor
    6.3,3.3,4.7,1.6,Iris-versicolor
    4.9,2.4,3.3,1.0,Iris-versicolor
    6.6,2.9,4.6,1.3,Iris-versicolor
    5.2,2.7,3.9,1.4,Iris-versicolor
    5.0,2.0,3.5,1.0,Iris-versicolor
    5.9,3.0,4.2,1.5,Iris-versicolor
    6.0,2.2,4.0,1.0,Iris-versicolor
    6.1,2.9,4.7,1.4,Iris-versicolor
    5.6,2.9,3.6,1.3,Iris-versicolor
    6.7,3.1,4.4,1.4,Iris-versicolor
    5.6,3.0,4.5,1.5,Iris-versicolor
    5.8,2.7,4.1,1.0,Iris-versicolor
    6.2,2.2,4.5,1.5,Iris-versicolor
    5.6,2.5,3.9,1.1,Iris-versicolor
    5.9,3.2,4.8,1.8,Iris-versicolor
    6.1,2.8,4.0,1.3,Iris-versicolor
    6.3,2.5,4.9,1.5,Iris-versicolor
    6.1,2.8,4.7,1.2,Iris-versicolor
    6.4,2.9,4.3,1.3,Iris-versicolor
    6.6,3.0,4.4,1.4,Iris-versicolor
    6.8,2.8,4.8,1.4,Iris-versicolor
    6.7,3.0,5.0,1.7,Iris-versicolor
    6.0,2.9,4.5,1.5,Iris-versicolor
    5.7,2.6,3.5,1.0,Iris-versicolor
    5.5,2.4,3.8,1.1,Iris-versicolor
    5.5,2.4,3.7,1.0,Iris-versicolor
    5.8,2.7,3.9,1.2,Iris-versicolor
    6.0,2.7,5.1,1.6,Iris-versicolor
    5.4,3.0,4.5,1.5,Iris-versicolor
    6.0,3.4,4.5,1.6,Iris-versicolor
    6.7,3.1,4.7,1.5,Iris-versicolor
    6.3,2.3,4.4,1.3,Iris-versicolor
    5.6,3.0,4.1,1.3,Iris-versicolor
    5.5,2.5,4.0,1.3,Iris-versicolor
    5.5,2.6,4.4,1.2,Iris-versicolor
    6.1,3.0,4.6,1.4,Iris-versicolor
    5.8,2.6,4.0,1.2,Iris-versicolor
    5.0,2.3,3.3,1.0,Iris-versicolor
    5.6,2.7,4.2,1.3,Iris-versicolor
    5.7,3.0,4.2,1.2,Iris-versicolor
    5.7,2.9,4.2,1.3,Iris-versicolor
    6.2,2.9,4.3,1.3,Iris-versicolor
    5.1,2.5,3.0,1.1,Iris-versicolor
    5.7,2.8,4.1,1.3,Iris-versicolor
    6.3,3.3,6.0,2.5,Iris-virginica
    5.8,2.7,5.1,1.9,Iris-virginica
    7.1,3.0,5.9,2.1,Iris-virginica
    6.3,2.9,5.6,1.8,Iris-virginica
    6.5,3.0,5.8,2.2,Iris-virginica
    7.6,3.0,6.6,2.1,Iris-virginica
    4.9,2.5,4.5,1.7,Iris-virginica
    7.3,2.9,6.3,1.8,Iris-virginica
    6.7,2.5,5.8,1.8,Iris-virginica
    7.2,3.6,6.1,2.5,Iris-virginica
    6.5,3.2,5.1,2.0,Iris-virginica
    6.4,2.7,5.3,1.9,Iris-virginica
    6.8,3.0,5.5,2.1,Iris-virginica
    5.7,2.5,5.0,2.0,Iris-virginica
    5.8,2.8,5.1,2.4,Iris-virginica
    6.4,3.2,5.3,2.3,Iris-virginica
    6.5,3.0,5.5,1.8,Iris-virginica
    7.7,3.8,6.7,2.2,Iris-virginica
    7.7,2.6,6.9,2.3,Iris-virginica
    6.0,2.2,5.0,1.5,Iris-virginica
    6.9,3.2,5.7,2.3,Iris-virginica
    5.6,2.8,4.9,2.0,Iris-virginica
    7.7,2.8,6.7,2.0,Iris-virginica
    6.3,2.7,4.9,1.8,Iris-virginica
    6.7,3.3,5.7,2.1,Iris-virginica
    7.2,3.2,6.0,1.8,Iris-virginica
    6.2,2.8,4.8,1.8,Iris-virginica
    6.1,3.0,4.9,1.8,Iris-virginica
    6.4,2.8,5.6,2.1,Iris-virginica
    7.2,3.0,5.8,1.6,Iris-virginica
    7.4,2.8,6.1,1.9,Iris-virginica
    7.9,3.8,6.4,2.0,Iris-virginica
    6.4,2.8,5.6,2.2,Iris-virginica
    6.3,2.8,5.1,1.5,Iris-virginica
    6.1,2.6,5.6,1.4,Iris-virginica
    7.7,3.0,6.1,2.3,Iris-virginica
    6.3,3.4,5.6,2.4,Iris-virginica
    6.4,3.1,5.5,1.8,Iris-virginica
    6.0,3.0,4.8,1.8,Iris-virginica
    6.9,3.1,5.4,2.1,Iris-virginica
    6.7,3.1,5.6,2.4,Iris-virginica
    6.9,3.1,5.1,2.3,Iris-virginica
    5.8,2.7,5.1,1.9,Iris-virginica
    6.8,3.2,5.9,2.3,Iris-virginica
    6.7,3.3,5.7,2.5,Iris-virginica
    6.7,3.0,5.2,2.3,Iris-virginica
    6.3,2.5,5.0,1.9,Iris-virginica
    6.5,3.0,5.2,2.0,Iris-virginica
    6.2,3.4,5.4,2.3,Iris-virginica
    5.9,3.0,5.1,1.8,Iris-virginica
  • 相关阅读:
    rabbitmq集群几个比较好的文章
    rabbitmq集群步骤
    rabbitmq安装
    查找出系统中大于50k 且小于100k 的文件并删除。
    现将文件a.txt 中的所有abc 替换成def
    统计/var/log/下有多少文件
    压缩解压目录结构不能改变
    chkconfig命令
    linux运维必须掌握
    三剑客
  • 原文地址:https://www.cnblogs.com/yrm1160029237/p/9904435.html
Copyright © 2011-2022 走看看