zoukankan      html  css  js  c++  java
  • k均值算法

    import matplotlib.pyplot as plt
    import numpy as np
    import time
    from django.template.defaultfilters import center
    def loadDataSet(fileName):
        dataMat=[]
        fr=open(fileName)
        for line in fr.readlines():
            curLine=line.strip().split('	')
            fltLine=map(float,curLine)
            dataMat.append([i for i in fltLine])
        return dataMat
    
    def distEclud(vecA,vecB):
        return np.sqrt(np.sum(np.power(vecA-vecB,2)))
    
    def randCent(dataSet,k):
        n=np.shape(dataSet)[1]
        centroids=np.mat(np.zeros((k,n)))
        for j in range(n):
            minJ=min(dataSet[:,j])
            rangeJ=float(max(dataSet[:,j])-minJ)
            centroids[:,j]=minJ+rangeJ*np.random.rand(k,1)
        return centroids
    
    def kMeans(dataSet,k):
        m=np.shape(dataSet)[0]
        clusterAssment=np.mat(np.zeros((m,2)))
        centroids=randCent(dataSet, k)
        clusterChanged=True
        while clusterChanged:
            clusterChanged=False
            for i in range(m):
                minDist=np.inf
                minIndex=-1
                for j in range(k):
                    distJI=distEclud(centroids[j,:], dataSet[i,:])
                    if distJI < minDist:
                        minDist=distJI;minIndex=j
                if  clusterAssment[i,0] != minIndex:
                    clusterChanged=True
                clusterAssment[i,:]=minIndex,minDist**2
            for cent in range(k):
                ptsInClust=dataSet[np.nonzero(clusterAssment[:,0].A == cent)[0]]
                centroids[cent,:]=np.mean(ptsInClust, axis=0)
        return centroids,clusterAssment
    def showImage(dataSet,center,label):
        c=['r','g','w','b']
        n=np.shape(dataSet)[0]
        for i in range(4):
            x=[]
    y=[] for j in range(n): if label[j]==i: x.append(dataSet[j,0]) y.append(dataSet[j,1]) plt.scatter(x,y,s=40,c=c[i]) center=center.A plt.scatter(center[:,0],center[:,1],c='m',marker='p',s=200) plt.show() if __name__ == '__main__': startTime=time.clock() dataSet=loadDataSet("testSet.txt") dataSet=np.array(dataSet) print(dataSet) center,cluster=kMeans(dataSet, 4) print(center) endTime=time.clock() print(endTime-startTime) showImage(dataSet, center, cluster[:,0])

    figure_1

  • 相关阅读:
    gvim在windows下的一些小技巧
    解决eclipse在ubuntu下无法找到jdk方法
    ubuntu 12.04 下安装wireshark
    使用坚果云同步数据
    Windows WMIC命令详解 (Windows Management Instrumentation Commandline)
    ubuntu 12.04 配置指南
    CHROME自定义样式扩展 —— STYLISH
    地漏
    卫生间装修,想要坐便改成蹲便,地面需要加高多少?
    不锈钢橱柜
  • 原文地址:https://www.cnblogs.com/sklww/p/3737003.html
Copyright © 2011-2022 走看看