zoukankan      html  css  js  c++  java
  • 机器学习K近邻算法

    from numpy import *
    import operator
    from os import listdir
    def classify0(inX, dataSet, labels, k):
        dataSetSize = dataSet.shape[0]
        diffMat = tile(inX, (dataSetSize,1)) - dataSet
        sqDiffMat = diffMat**2
        sqDistances = sqDiffMat.sum(axis=1)
        distances = sqDistances**0.5
        sortedDistIndicies = distances.argsort()     
        classCount={}          
        for i in range(k):
            voteIlabel = labels[sortedDistIndicies[i]]
            classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
        sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
        return sortedClassCount[0][0];
    
    def autoNorm(dataSet):
        minVals=dataSet.min(0);
        maxVals=dataSet.max(0);
        ranges=maxVals-minVals;
        normDataSet=zeros(shape(dataSet));
        m=dataSet.shape[0];
        normDataSet=dataSet-tile(minVals,(m,1));
        normDataSet=normDataSet/tile(ranges,(m,1));
        return  normDataSet,ranges,minVals;
    
    def file2matrix(filename):
        fr= open(filename);
        arrayline=fr.readlines();
        numberoflines=len(arrayline);
        returnMat=zeros((numberoflines,3));
        classlabelvector=[];
        index=0;
        for line in arrayline:
            line=line.strip();
            listFromLine=line.split('	');
            returnMat[index,:]=listFromLine[0:3];
            classlabelvector.append(int(listFromLine[-1]));
            index+=1;
        return returnMat,classlabelvector;
    
    def datingClassTest():
        hoRatio=0.10;
        datingDataMat,datingLabels=file2matrix('datingTestSet.txt');
        normMat,ranges,minVals=autoNorm(datingDataMat);
        m=normMat.shape[0];
        numTestVecs=int(m*hoRatio);
        errorCount=0.0;
        for i in range(numTestVecs):
            classiferResult=classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3);
            print "the classifier came back with %d,the real answer is %d" %(classiferResult,datingLabels[i]);
            if classiferResult!=datingLabels[i]:
                errorCount+=1.0;
        print "the total error rate is %f" %(errorCount/float(numTestVecs));
    
    def classifyPerson():
        resultList=['not at all','in small doses','in large doses'];
        percentTats=float(raw_input("percentage of time spent playing video games?"))
        ffMiles=float(raw_input("frequent flier miles earned per year?"));
        iceCream=float(raw_input("liters of icecream cosumed per year?"));
        datingDataMat,datingLabels=file2matrix('datingTestSet2.txt');
        normMat,ranges,minVals=autoNorm(datingDataMat);
        inArr=array([ffMiles,percentTats,iceCream]);
        classifierResult = classify0((inArr-minVals)/ranges,normMat,datingLabels,3);
        print "You will probably like this person %s" %resultList[classifierResult-1];
  • 相关阅读:
    张建(北京工业大学软件学院副教授)_百度百科
    孤独患者马天宇:独自生存我也会适应得很好_红人访_腾讯娱乐_腾讯网
    zz
    java~springboot~ibatis Invalid bound statement (not found)原因
    数据库~dotnetcore连接Mysql插入中文失败
    k8s~术语解释
    springboot~让我习惯了TDD的开发模式
    POJ 2498 Martian Mining
    Java中的DeskTop类
    我和ip_conntrack不得不说的一些事
  • 原文地址:https://www.cnblogs.com/cherryMJY/p/8525151.html
Copyright © 2011-2022 走看看