zoukankan      html  css  js  c++  java
  • kNN算法

    import numpy as np
    import operator
    import os
    def createDataset():
            group=np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
            lables=['A','A','B','B']
            return  group,lables
    
    def classify0(inX,dataSet,labels,k):
            dataSetSize=dataSet.shape[0]
            diffMat=np.tile(inX,(dataSetSize,1))-dataSet
            sqDiffMat=diffMat**2
            sqDistances=sqDiffMat.sum(axis=1)
            distances=sqDistances**0.5
            sortDistancesIndex=distances.argsort()
            classCount={}#TODO  toOrder dectionary
            for i in range(k):
                    voteIlabel=labels[sortDistancesIndex[i]]
                    classCount[voteIlabel]=classCount.get(voteIlabel,0)+1
            sortedClassCount=sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
            return sortedClassCount[0][0]
    def filematrix(filename):
            fr=open(filename)
            arrayOfLines=fr.readlines()
            numberOfLines=len(arrayOfLines)
            returnMat=np.zeros((numberOfLines,3))
            classLableVector=[]
            index=0
            for line in arrayOfLines:
                line=line.strip()
                listFromLine=line.split('	')
                returnMat[index,:]=listFromLine[0:3]
                classLableVector.append(int(listFromLine[-1]))
                index+=1
            return returnMat,classLableVector
    
    def autoNorm(dataSet):
            minVals=dataSet.min(0)
            maxVals=dataSet.max(0)
            rangs=maxVals-minVals
            dtRow=dataSet.shape[0]
            normDataset=dataSet-np.tile(minVals,(dtRow,1))
            resultDataset=normDataset/np.tile(rangs,(dtRow,1))
            return resultDataset,rangs,minVals
    def datingClassTest():
            hoRatio=0.10
            errorCount=0.0
            datingMat,datingLabels=filematrix('dts.txt');
            normMat,normRang,normMin=autoNorm(datingMat)
            dataRows=normMat.shape[0]
            testDataRows=int(dataRows*hoRatio)
            for i in range(testDataRows):
                    classfileterResult=classfy0(normMat[i,:],normMat[testDataRows:dataRows,:],datingLabels[testDataRows:dataRows],3)
                    print("这次分类结果是: %d,这个真实的结果为:%d"%(classfileterResult,datingLabels[i]))
                    if(classfileterResult!= datingLabels[i]):errorCount+=1.0
                    print("这次分类的总错误率为:%f"%(errorCount/float(testDataRows)))
    
    
    def classifyPerson():
        resultList = ['没有魅力', '魅力一般', '很有魅力']
        percentTats = float(input("每天所玩电子游戏的占比?"))
        ffMiles = float(input("每年的飞行里程数?"))
        iceCream = float(input("每周吃多少冰淇淋(升)?"))
        datingDataMat, datingLabels = filematrix('dts.txt')
        normMat, ranges, minVals = autoNorm(datingDataMat)
        inArr = np.array([ffMiles, percentTats, iceCream])
        classifierResult = classify0((inArr - minVals)/ranges, normMat, datingLabels,3)
        print ('这个人让人感觉: ', resultList[classifierResult - 1])
        
    # 2:手写识别系统
    #将一个32*32的二进制图像矩阵转换成1*1024的向量
    
    def img2vector(filename):
        returnVect = np.zeros((1,1024))
        fr = open(filename)
        for i in range(32):
            lineStr = fr.readline()
            for j in range(32):
                returnVect[0, 32*i+j] = int(lineStr[j])
        return returnVect
    
    
    #手写识别系统测试代码
    def handwritingClassTest():
        hwLabels = []
        trainingFileList = os.listdir('trainingDigits')   #获取目录内容
        m = len(trainingFileList)
        trainingMat = np.zeros((m, 1024))
        for i in range(m):
            fileNameStr = trainingFileList[i]              #分割得到标签  从文件名解析得到分类数据
            fileStr = fileNameStr.split('.')[0]
            classStr = int(fileStr.split('_')[0])
            hwLabels.append(classStr)                 #测试样例标签
            trainingMat[i,:] = img2vector('trainingDigits/%s' % fileNameStr)
        testFileList = os.listdir('testDigits')
        errorCount = 0.0
        mTest = len(testFileList)
        for i in range(mTest):
            fileNameStr = testFileList[i]
            fileStr = fileNameStr.split('.')[0]
            classStr = int(fileStr.split('_')[0])
            vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
            classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
            print ('the classifier came back with: %d, the real answer is: %d' % (classifierResult, classStr))
            if(classifierResult != classStr): errorCount += 1.0
        print ("
    the total numbers of errors is : %d" % errorCount)
        print ("
    the total error rate is: %f" % (errorCount/float(mTest)))
  • 相关阅读:
    2019-2020-1 20199310《Linux内核原理与分析》第九周作业
    2019-2020-1 20199310《Linux内核原理与分析》第八周作业
    Android开发笔记(十七)——Fragment详解
    Android开发笔记(十六)——Activity的4种启动模式
    Android开发笔记(十五)——Activity的跳转和数据传递
    Android开发笔记(十四)——Activity的生命周期
    Android开发笔记(十三)——Activity的创建三部曲
    Android实战开发——News
    Android开发笔记(十二)——WebView
    Android开发笔记(十一)——ScrollView滚动视图
  • 原文地址:https://www.cnblogs.com/daxiongblog/p/5538498.html
Copyright © 2011-2022 走看看