zoukankan      html  css  js  c++  java
  • python 实现简单的KNN算法

    from numpy import *
    import operator
    
    def createDataSet():
        group = array([[3,104],[2,100],[1,81],[101,10],[99,5],[98,2]])
        labels = ['爱情片','爱情片','爱情片','动作片','动作片','动作片']
        return group, labels
    
    def classify0(inX, dataSet, labels, k):
        dataSetSize = dataSet.shape[0]
        diffMat = tile(inX, (dataSetSize,1)) - dataSet
        sqDiffMat = diffMat ** 2
        sqDistances = sqDiffMat.sum(axis=1)
        distances = sqDistances ** 0.5
        sortedDistIndicies = distances.argsort()
        classCount = {}
        for i in range(k):
            voteIlabel = labels[sortedDistIndicies[i]]
            classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
        sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
        return sortedClassCount[0][0]
    
    group,labels = createDataSet()
    print(classify0([500,90],group,labels,3))

     使用错误率来检验算法 

    from numpy import *
    
    import matplotlib
    import matplotlib.pyplot as plt 
    import operator
    
    def file2matrix(filename):
        fr = open(filename)
        arrayOLines = fr.readlines()
        numberOfLines = len(arrayOLines)
        returnMat = zeros((numberOfLines,3))
        classLabelVector = []
        index = 0
        for line in arrayOLines:
            line = line.strip()
            listFromLine = line.split('	')
            returnMat[index,:] = listFromLine[0:3]
            classLabelVector.append(int(listFromLine[-1]))
            index += 1
        return returnMat,classLabelVector
    
    def autoNorm(dataSet):
        minVals = dataSet.min(0)
        maxVals = dataSet.max(0)
        ranges = maxVals - minVals
        normDataSet = zeros(shape(dataSet))
        # print(shape(dataSet))
        # print(normDataSet)
        m = dataSet.shape[0]
        normDataSet = dataSet - tile(minVals,(m,1))
        normDataSet = normDataSet / tile(ranges,(m,1))
        return normDataSet, ranges, minVals
    
    
    def classify0(inX, dataSet, labels, k):
        dataSetSize = dataSet.shape[0]
        diffMat = tile(inX, (dataSetSize,1)) - dataSet
        sqDiffMat = diffMat ** 2
        sqDistances = sqDiffMat.sum(axis=1)
        distances = sqDistances ** 0.5
        sortedDistIndicies = distances.argsort()
        classCount = {}
        for i in range(k):
            voteIlabel = labels[sortedDistIndicies[i]]
            classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
        sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
        return sortedClassCount[0][0]
    
    
    def datingClassTest():
        hoRatio = 0.10
        datingDataMat,datingLabels = file2matrix('datingTestSet2.txt')
        normMat,ranges,minVals = autoNorm(datingDataMat)
        m = normMat.shape[0]
        # print(m)
        numTestVecs = int(m*hoRatio)
        errorCount = 0.0
        for i in range(numTestVecs):
            classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
            print("the classifier came back with: %d,the real answer is: %d" % (classifierResult, datingLabels[i]))
            if (classifierResult != datingLabels[i]):
                errorCount += 1.0
        print("the total error rate is: %f" % (errorCount/float(numTestVecs)))
    datingClassTest()

     数据集下载:https://i.cnblogs.com/Files.aspx  

    datingTestSet2.rar
  • 相关阅读:
    MiniUI表单验证实践
    MiniUI官方表单验证示例
    MiniUI表单验证总结
    Js-事件分发与DOM事件流
    Windows远程桌面连接的利器-mRemote
    Git 以分支的方式同时管理多个项目
    GIT 如何合并另一个远程Git仓库的文件到本地仓库里某个指定子文件夹并不丢失远程提交记录?
    如何导入另一个 Git库到现有的Git库并保留提交记录
    Total Commander如何设置自定义快捷键在当前目录打开ConEmu
    PHP ECSHOP中 诡异的问题:expects parameter 1 to be double
  • 原文地址:https://www.cnblogs.com/ncuhwxiong/p/9456943.html
Copyright © 2011-2022 走看看