zoukankan      html  css  js  c++  java
  • k-Means聚类算法

    聚类算法是ML中一个重要分支,一般采用unsupervised learning进行学习,聚类算法分为K-Means, K-Medoids, GMM, Spectral clustering,Ncut五个算法;本文将实现K-eans算法。

    K-Means算法:

           1. 将数据分为k个非空子集

           2. 计算每个类中心点(k-means<centroid>中心点是所有点的average),记为seed point

           3. 将每个object聚类到最近seed point

           4. 返回2,当聚类结果不再变化的时候stop

    复杂度:

           O(kndt)

           -计算两点间距离:d

           -指定类:O(kn)   ,k是类数

           -迭代次数上限:t

    KMeans.h

      1 #include "stdafx.h"
      2 #include <iostream>
      3 using namespace std;
      4 template <typename Type>
      5 class KMeans{
      6 public:
      7     KMeans(const size_t nd =0,const int nk=0,const float precision = 0.0001):m_ndataNumbers(nd),
      8         m_nkNumbers(nk),m_iterations(0),m_datas(NULL),m_center(NULL),m_precision(precision){
      9         m_datas = new Type[m_ndataNumbers];
     10         m_center = new Type[m_nkNumbers];
     11     }
     12     KMeans(Type[],Type[],const size_t ,const int,const float);
     13     ~KMeans(){
     14         delete[]m_datas;
     15         delete[]m_center;
     16     }
     17     
     18     Type* getDatas()const;              // get the datas
     19     Type* getCenter()const;         // get the centers
     20     int iterationTimes()const;      // iteration times 
     21     void kmeans();        // carry out k-means 
     22     void printCenter(); // cout center
     23 private:
     24     float dataDivide(Type* , size_t*);   // data divide
     25     void changeCenters(Type* , size_t*);   //  change centers 
     26 private:
     27     Type *m_datas;
     28     Type *m_center;
     29     const size_t m_ndataNumbers;   //data numbers
     30     const  int m_nkNumbers;   // center numbers
     31     const float m_precision;    // end iteration precision 
     32     int m_iterations;   //    carry out times
     33 };
     34  // initialize the datas and the center
     35 template <typename Type>
     36 KMeans<Type>::KMeans( Type datas[] ,Type center[],const size_t nd,const int nk,const float precision):
     37     m_ndataNumbers(nd),m_nkNumbers(nk),m_iterations(0),m_center(NULL),m_datas(NULL),m_precision(precision){
     38     m_datas = new Type[m_ndataNumbers];
     39     m_center=new Type[m_nkNumbers];
     40     for(size_t i = 0 ; i<m_ndataNumbers ;i++){
     41         m_datas[i] = datas[i];
     42     }
     43     for(int i = 0 ; i<m_nkNumbers ; i++){
     44         m_center[i] = center[i];
     45     }
     46 }
     47  template <typename Type>
     48  Type* KMeans<Type> ::getDatas()const{
     49      return this->m_datas;
     50  }
     51  // get the center
     52  template <typename Type>
     53  Type* KMeans<Type> ::getCenter()const{
     54      return this->m_center;
     55  }
     56  // get iteration times
     57  template<typename Type>
     58  int KMeans<Type>::iterationTimes()const{
     59      return this->m_iterations;
     60  }
     61  // carry out kmeans
     62  template<typename Type>
     63  void KMeans<Type>::kmeans(){
     64      float previous  = 0;   //  
     65      float current = 1;    
     66      size_t *numbers = new size_t[m_nkNumbers];   // record every cluster datas
     67      Type *sumvalues = new Type[m_nkNumbers];   // record every cluster values
     68      while((current-previous)>m_precision){ 
     69          // initialize zero 
     70          for(int i = 0 ; i<m_nkNumbers ; i++){
     71              numbers[i] = 0;
     72              sumvalues[i] =0;
     73          }
     74          previous = current;
     75          current = dataDivide(sumvalues,numbers);
     76          changeCenters(sumvalues,numbers);
     77          m_iterations++;
     78         
     79      }
     80      delete[] numbers;
     81      delete[] sumvalues;  
     82  }
     83  // data divide
     84 template <typename Type>
     85 float KMeans<Type>::dataDivide(Type*sumvalues , size_t*numbers){
     86     float dist = 0.0;
     87     for(size_t i = 0 ; i<m_ndataNumbers ; i++){
     88         float d = sqrt(float(m_datas[i]-m_center[0]));
     89         int pos = 0;
     90         for(int j =1 ; j <m_nkNumbers ; j++){
     91             if(d > sqrt(float(m_datas[i]-m_center[j]))){
     92                 d = sqrt(float(m_datas[i]-m_center[j]));
     93                 pos = j;
     94             }
     95         dist+=d;
     96         sumvalues[pos]+=m_datas[i];
     97         numbers[pos]++;
     98         }
     99     }
    100     return dist;
    101 }
    102     // change the center
    103 template<typename Type>
    104 void KMeans<Type>::changeCenters(Type*sumvalues , size_t*numbers){
    105     for(int i=0 ; i<m_nkNumbers ; i++){
    106         if(numbers[i]==0)continue;
    107         m_center[i] = sumvalues[i]/numbers[i];
    108     }
    109 }
    110 template<typename Type>
    111 void KMeans<Type>::printCenter(){
    112     for(int i = 0 ; i<m_nkNumbers ;i++){
    113         cout << "center " << i <<":  "<< m_center[i] <<endl;
    114     }
    115     cout <<endl;
    116 }
  • 相关阅读:
    gitignore 过滤文件
    lua语言入门之Sublime Text设置lua的Build System
    进程间通信
    临界区 事件 互斥对象等多线程编程基础
    Delphi通过Map文件查找内存地址出错代码所在行
    Delphi/C++ Builder Map文件格式解析
    深入理解计算机系统----读书笔记
    TCP/IP——内网IP
    Python——import与reload模块的区别
    Linux——grep binary file
  • 原文地址:https://www.cnblogs.com/bobo0892/p/4002988.html
Copyright © 2011-2022 走看看