from numpy import * import operator from os import listdir def classify0(inX, dataSet, labels, k): dataSetSize = dataSet.shape[0] diffMat = tile(inX, (dataSetSize,1)) - dataSet sqDiffMat = diffMat**2 sqDistances = sqDiffMat.sum(axis=1) distances = sqDistances**0.5 sortedDistIndicies = distances.argsort() classCount={} for i in range(k): voteIlabel = labels[sortedDistIndicies[i]] classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1 sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True) return sortedClassCount[0][0]; def autoNorm(dataSet): minVals=dataSet.min(0); maxVals=dataSet.max(0); ranges=maxVals-minVals; normDataSet=zeros(shape(dataSet)); m=dataSet.shape[0]; normDataSet=dataSet-tile(minVals,(m,1)); normDataSet=normDataSet/tile(ranges,(m,1)); return normDataSet,ranges,minVals; def file2matrix(filename): fr= open(filename); arrayline=fr.readlines(); numberoflines=len(arrayline); returnMat=zeros((numberoflines,3)); classlabelvector=[]; index=0; for line in arrayline: line=line.strip(); listFromLine=line.split(' '); returnMat[index,:]=listFromLine[0:3]; classlabelvector.append(int(listFromLine[-1])); index+=1; return returnMat,classlabelvector; def datingClassTest(): hoRatio=0.10; datingDataMat,datingLabels=file2matrix('datingTestSet.txt'); normMat,ranges,minVals=autoNorm(datingDataMat); m=normMat.shape[0]; numTestVecs=int(m*hoRatio); errorCount=0.0; for i in range(numTestVecs): classiferResult=classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3); print "the classifier came back with %d,the real answer is %d" %(classiferResult,datingLabels[i]); if classiferResult!=datingLabels[i]: errorCount+=1.0; print "the total error rate is %f" %(errorCount/float(numTestVecs)); def classifyPerson(): resultList=['not at all','in small doses','in large doses']; percentTats=float(raw_input("percentage of time spent playing video games?")) ffMiles=float(raw_input("frequent flier miles earned per year?")); iceCream=float(raw_input("liters of icecream cosumed per year?")); datingDataMat,datingLabels=file2matrix('datingTestSet2.txt'); normMat,ranges,minVals=autoNorm(datingDataMat); inArr=array([ffMiles,percentTats,iceCream]); classifierResult = classify0((inArr-minVals)/ranges,normMat,datingLabels,3); print "You will probably like this person %s" %resultList[classifierResult-1];