from numpy import * import matplotlib.pyplot as plt def loadData(filename): data=[] for line in open(filename).readlines(): linth=line.strip().split(" ") dataline=map(float,linth) data.append(dataline) return data data=loadData("testSet.txt") def randcent(data,k): n=shape(data)[1] centdata=mat(zeros((k,n))) for i in range(n): mind=min(data[:,i]) max2min=float(max(data[:,i])-mind) centdata[:,i]=mind+max2min*random.rand(k,1) return centdata def calcdist(A,B): return sqrt(sum(power(A-B,2))) def kmeans(data,k): m=shape(data)[0] mark=mat(zeros((m,2))) cent=randcent(data,k) centerchanged=True while centerchanged: centerchanged=False for i in range(m): index=-1 mindata=10000 for j in range(k): temp=calcdist(cent[j,:],data[i,:]) if temp<mindata: mindata=temp index=j if mark[i, 0] != index: centerchanged = True mark[i, :] = index, mindata for t in range(k): ar=data[nonzero(mark[:,0].A==t)[0]] cent[t,:]=mean(ar,0) return cent,mark datamat=mat(loadData("testSet.txt")) centdata,cluster=kmeans(datamat,4) centarr=centdata.A clusterarr=cluster.A findonemax=max([tt[1] for tt in clusterarr if tt[0]==0]) findtwomax=max(tt[1] for tt in clusterarr if tt[0]==1) findthreemax=max(tt[1] for tt in clusterarr if tt[0]==2) findfourmax=max(tt[1] for tt in clusterarr if tt[0]==3) theta=arange(0,2*pi,0.01) one=[tb[0] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==0)[0]].A]] onemore=[tb[1] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==0)[0]].A]] two=[tb[0] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==1)[0]].A]] twomore=[tb[1] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==1)[0]].A]] three=[tb[0] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==2)[0]].A]] threemore=[tb[1] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==2)[0]].A]] four=[tb[0] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==3)[0]].A]] fourmore=[tb[1] for tb in [tt for tt in datamat[nonzero(cluster[:,0].A==3)[0]].A]] fig=plt.figure() ax=fig.add_subplot(111) ax.scatter(one,onemore,c="blue",s=10,marker='s') ax.scatter(two,twomore,c="green",marker='*') ax.scatter(three,threemore,c='gray',marker='h') ax.scatter(four,fourmore) ax.scatter(centarr[:,0],centarr[:,1],c='red') ax.plot(centarr[0][0]+findonemax*cos(theta),centarr[0][1]+findonemax*sin(theta)) ax.plot(centarr[1][0]+findtwomax*cos(theta),centarr[1][1]+findtwomax*sin(theta)) ax.plot(centarr[2][0]+findthreemax*cos(theta),centarr[2][1]+findthreemax*sin(theta)) ax.plot(centarr[3][0]+findfourmax*cos(theta),centarr[3][1]+findfourmax*sin(theta)) plt.show()