zoukankan      html  css  js  c++  java
  • python 实现简单的KNN算法

    from numpy import *
    import operator
    
    def createDataSet():
        group = array([[3,104],[2,100],[1,81],[101,10],[99,5],[98,2]])
        labels = ['爱情片','爱情片','爱情片','动作片','动作片','动作片']
        return group, labels
    
    def classify0(inX, dataSet, labels, k):
        dataSetSize = dataSet.shape[0]
        diffMat = tile(inX, (dataSetSize,1)) - dataSet
        sqDiffMat = diffMat ** 2
        sqDistances = sqDiffMat.sum(axis=1)
        distances = sqDistances ** 0.5
        sortedDistIndicies = distances.argsort()
        classCount = {}
        for i in range(k):
            voteIlabel = labels[sortedDistIndicies[i]]
            classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
        sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
        return sortedClassCount[0][0]
    
    group,labels = createDataSet()
    print(classify0([500,90],group,labels,3))

     使用错误率来检验算法 

    from numpy import *
    
    import matplotlib
    import matplotlib.pyplot as plt 
    import operator
    
    def file2matrix(filename):
        fr = open(filename)
        arrayOLines = fr.readlines()
        numberOfLines = len(arrayOLines)
        returnMat = zeros((numberOfLines,3))
        classLabelVector = []
        index = 0
        for line in arrayOLines:
            line = line.strip()
            listFromLine = line.split('	')
            returnMat[index,:] = listFromLine[0:3]
            classLabelVector.append(int(listFromLine[-1]))
            index += 1
        return returnMat,classLabelVector
    
    def autoNorm(dataSet):
        minVals = dataSet.min(0)
        maxVals = dataSet.max(0)
        ranges = maxVals - minVals
        normDataSet = zeros(shape(dataSet))
        # print(shape(dataSet))
        # print(normDataSet)
        m = dataSet.shape[0]
        normDataSet = dataSet - tile(minVals,(m,1))
        normDataSet = normDataSet / tile(ranges,(m,1))
        return normDataSet, ranges, minVals
    
    
    def classify0(inX, dataSet, labels, k):
        dataSetSize = dataSet.shape[0]
        diffMat = tile(inX, (dataSetSize,1)) - dataSet
        sqDiffMat = diffMat ** 2
        sqDistances = sqDiffMat.sum(axis=1)
        distances = sqDistances ** 0.5
        sortedDistIndicies = distances.argsort()
        classCount = {}
        for i in range(k):
            voteIlabel = labels[sortedDistIndicies[i]]
            classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
        sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
        return sortedClassCount[0][0]
    
    
    def datingClassTest():
        hoRatio = 0.10
        datingDataMat,datingLabels = file2matrix('datingTestSet2.txt')
        normMat,ranges,minVals = autoNorm(datingDataMat)
        m = normMat.shape[0]
        # print(m)
        numTestVecs = int(m*hoRatio)
        errorCount = 0.0
        for i in range(numTestVecs):
            classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
            print("the classifier came back with: %d,the real answer is: %d" % (classifierResult, datingLabels[i]))
            if (classifierResult != datingLabels[i]):
                errorCount += 1.0
        print("the total error rate is: %f" % (errorCount/float(numTestVecs)))
    datingClassTest()

     数据集下载:https://i.cnblogs.com/Files.aspx  

    datingTestSet2.rar
  • 相关阅读:
    Xshell配置ssh免密码登录-密钥公钥(Public key)与私钥(Private Key)登录
    bypass disable_function的方法及蚁剑插件bypass-php-function使用
    cisco-GNS3-pix防火墙基本配置实操(持续更新)
    "锁定文件失败 打不开磁盘或它所依赖的某个快照磁盘。模块启动失败。未能启动虚拟机"--解决方法
    python实现图片转字符画
    GNS3--cisco路由器NAT配置
    python虚拟环境----virtualenv
    防火墙----思科路由器基本配置1
    XXE漏洞学习1
    vc程序设计-----位图
  • 原文地址:https://www.cnblogs.com/ncuhwxiong/p/9456943.html
Copyright © 2011-2022 走看看