zoukankan      html  css  js  c++  java
  • python实现K-means

    import pandas as pd
    import numpy as np
    data = pd.read_csv(r'data.csv')
    train = data.iloc[:,0:4]
    
    #计算不同样本之间的欧几里得距离,
    #如果不同样本数据的刻度不一致,要对数据进行规格化处理
    def nearest(traini,center):
        distance = np.zeros((len(center),1))
        for i in range(len(center)):
            dist = traini-center.ix[i,:]
            distance[i]=dist.dot(dist.T)
        return distance.argmin()
    
    def zhidian(x):
        return x.sum()/len(x)
    
    #收敛条件
    def shoulian(train,center):
        julihe = 0
        for i in range(len(train)):
            #print(train.iloc[i,0:4])
            made = train.ix[i,'near']
            dist = train.iloc[i,0:4] - center.ix[made,0:4]
            julihe = julihe +dist.dot(dist.T)
        return julihe
    
    def kmeans(train,center,julihe):
        #随机选择3个质点
        #每个样本的最近的类
        print('return')
        oldtrain = train
        oldcenter = center
        near = np.zeros((len(train),1)).astype(int)
        for i in range(len(train)):
            near[i] = nearest(train.ix[i,:],center)
        #重新计算质点
        train['near']=near    
        center = train.groupby(train['near']).apply(zhidian)
        #收敛条件
        newjulihe = shoulian(train,center)
        if newjulihe<julihe:
            del train['near']
            del center['near']
            return kmeans(train,center,newjulihe)
        else:
            print(oldcenter)
            return oldtrain,oldcenter
    
    def sdasd(train,julihe):
        center = train.ix[0:3,:]
        train,center = kmeans(train,center,julihe)
        return train,center
    
    julihe = 100000
    train,center = sdasd(train,julihe)
  • 相关阅读:
    linxu 网络管理
    Linux 命令2
    crontab命令
    Linux 命令
    Linux 命令
    Git命令
    GIT
    Python 修改文件内容
    logging模块
    CSS 选择器
  • 原文地址:https://www.cnblogs.com/chenyaling/p/7262111.html
Copyright © 2011-2022 走看看