zoukankan      html  css  js  c++  java
  • kNN算法

    import numpy as np
    import operator
    import os
    def createDataset():
            group=np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
            lables=['A','A','B','B']
            return  group,lables
    
    def classify0(inX,dataSet,labels,k):
            dataSetSize=dataSet.shape[0]
            diffMat=np.tile(inX,(dataSetSize,1))-dataSet
            sqDiffMat=diffMat**2
            sqDistances=sqDiffMat.sum(axis=1)
            distances=sqDistances**0.5
            sortDistancesIndex=distances.argsort()
            classCount={}#TODO  toOrder dectionary
            for i in range(k):
                    voteIlabel=labels[sortDistancesIndex[i]]
                    classCount[voteIlabel]=classCount.get(voteIlabel,0)+1
            sortedClassCount=sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
            return sortedClassCount[0][0]
    def filematrix(filename):
            fr=open(filename)
            arrayOfLines=fr.readlines()
            numberOfLines=len(arrayOfLines)
            returnMat=np.zeros((numberOfLines,3))
            classLableVector=[]
            index=0
            for line in arrayOfLines:
                line=line.strip()
                listFromLine=line.split('	')
                returnMat[index,:]=listFromLine[0:3]
                classLableVector.append(int(listFromLine[-1]))
                index+=1
            return returnMat,classLableVector
    
    def autoNorm(dataSet):
            minVals=dataSet.min(0)
            maxVals=dataSet.max(0)
            rangs=maxVals-minVals
            dtRow=dataSet.shape[0]
            normDataset=dataSet-np.tile(minVals,(dtRow,1))
            resultDataset=normDataset/np.tile(rangs,(dtRow,1))
            return resultDataset,rangs,minVals
    def datingClassTest():
            hoRatio=0.10
            errorCount=0.0
            datingMat,datingLabels=filematrix('dts.txt');
            normMat,normRang,normMin=autoNorm(datingMat)
            dataRows=normMat.shape[0]
            testDataRows=int(dataRows*hoRatio)
            for i in range(testDataRows):
                    classfileterResult=classfy0(normMat[i,:],normMat[testDataRows:dataRows,:],datingLabels[testDataRows:dataRows],3)
                    print("这次分类结果是: %d,这个真实的结果为:%d"%(classfileterResult,datingLabels[i]))
                    if(classfileterResult!= datingLabels[i]):errorCount+=1.0
                    print("这次分类的总错误率为:%f"%(errorCount/float(testDataRows)))
    
    
    def classifyPerson():
        resultList = ['没有魅力', '魅力一般', '很有魅力']
        percentTats = float(input("每天所玩电子游戏的占比?"))
        ffMiles = float(input("每年的飞行里程数?"))
        iceCream = float(input("每周吃多少冰淇淋(升)?"))
        datingDataMat, datingLabels = filematrix('dts.txt')
        normMat, ranges, minVals = autoNorm(datingDataMat)
        inArr = np.array([ffMiles, percentTats, iceCream])
        classifierResult = classify0((inArr - minVals)/ranges, normMat, datingLabels,3)
        print ('这个人让人感觉: ', resultList[classifierResult - 1])
        
    # 2:手写识别系统
    #将一个32*32的二进制图像矩阵转换成1*1024的向量
    
    def img2vector(filename):
        returnVect = np.zeros((1,1024))
        fr = open(filename)
        for i in range(32):
            lineStr = fr.readline()
            for j in range(32):
                returnVect[0, 32*i+j] = int(lineStr[j])
        return returnVect
    
    
    #手写识别系统测试代码
    def handwritingClassTest():
        hwLabels = []
        trainingFileList = os.listdir('trainingDigits')   #获取目录内容
        m = len(trainingFileList)
        trainingMat = np.zeros((m, 1024))
        for i in range(m):
            fileNameStr = trainingFileList[i]              #分割得到标签  从文件名解析得到分类数据
            fileStr = fileNameStr.split('.')[0]
            classStr = int(fileStr.split('_')[0])
            hwLabels.append(classStr)                 #测试样例标签
            trainingMat[i,:] = img2vector('trainingDigits/%s' % fileNameStr)
        testFileList = os.listdir('testDigits')
        errorCount = 0.0
        mTest = len(testFileList)
        for i in range(mTest):
            fileNameStr = testFileList[i]
            fileStr = fileNameStr.split('.')[0]
            classStr = int(fileStr.split('_')[0])
            vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
            classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
            print ('the classifier came back with: %d, the real answer is: %d' % (classifierResult, classStr))
            if(classifierResult != classStr): errorCount += 1.0
        print ("
    the total numbers of errors is : %d" % errorCount)
        print ("
    the total error rate is: %f" % (errorCount/float(mTest)))
  • 相关阅读:
    牛客网 二叉树的镜像 JAVA
    牛客网 反转链表 JAVA
    牛客网 调整数组顺序使奇数位于偶数前面 JAVA
    Integer to Roman LeetCode Java
    Valid Number leetcode java
    Longest Common Prefix
    Wildcard Matching leetcode java
    Regular Expression Matching
    Longest Palindromic Substring
    Add Binary LeetCode Java
  • 原文地址:https://www.cnblogs.com/daxiongblog/p/5538498.html
Copyright © 2011-2022 走看看