zoukankan      html  css  js  c++  java
  • K-means算法[聚类算法]

    聚类算法k-Means的实现

      1 <?php
      2 /*
      3 *Kmeans法(聚类算法的实现)
      4 */
      5 
      6 
      7 /*
      8 
      9 *求误差平方和J
     10 
     11 */
     12 
     13 //--------------------------------------------------------------------
     14 function JI($center,$array_center)
     15 {
     16     $total_sum = 0;
     17     for($i=0;$i<count($center);$i++)
     18     {
     19         for($j=0;$j<count($center[$i]);$j++)
     20         {
     21             $sum = 0;
     22             for($k=1;$k<=3;$k++)
     23             {
     24                 //print_r("$center[$i][$k]".$center[$i][$j][$k]."<br>");
     25                 $sum +=pow($center[$i][$j][$k]-$array_center[$i][$k-1],2);
     26             }
     27             //print_r($sum."<br>");
     28             $total_sum +=$sum;
     29         }
     30     }
     31     return $total_sum;
     32 }
     33 //--------------------------------------------------------------------
     34 
     35 /*
     36 
     37 *K-means(聚类算法的实现)
     38 
     39 */
     40 
     41 //--------------------------------------------------------------------
     42 function Kmeans($train,$k,$array_center)
     43 {
     44     
     45     $flags = true;
     46     do{
     47         if($flags)
     48         {
     49             $total_sum = 10;
     50         }else $total_sum = $total_sum2;
     51         $array_distance = array(array());
     52         array_splice($array_distance,0,1);
     53         for($i=1;$i<count($train);$i++)
     54         {
     55             $array =array(NULL);
     56             $array[0]=$train[$i][0];
     57             for($j=1;$j<count($train[$i]);$j++)
     58             {
     59     /*
     60                 print_r($train[$i][1]."  ");
     61                 print_r($array_center[$j-1][2]."  ");
     62                 print_r(pow($train[$i][1]-$array_center[$j-1][0],2)."  ");
     63     */            
     64                 $sum = 0;
     65                 for($m=0;$m<count($array_center);$m++)
     66                 {
     67                     $sum +=pow($train[$i][$m+1]-$array_center[$j-1][$m],2);
     68                 }
     69                 $distance = sqrt($sum);
     70     /*
     71                 print_r($sum."  ");
     72                 print_r($distance."  ");
     73 
     74                 echo "<br>";
     75     */            array_push($array,$distance);
     76             }
     77             array_push($array_distance,$array);
     78         }
     79         $array_min = array(array());
     80         array_splice($array_min,0,1);
     81         for($i=0;$i<count($array_distance);$i++)
     82         {
     83             $array = array(NULL);
     84             $array[0] = $array_distance[$i][0];
     85             $num = 1;
     86             $min = $array_distance[$i][1];
     87             for($j=2;$j<count($array_distance[$i]);$j++)
     88             {
     89                 if($min>$array_distance[$i][$j]){
     90                     $num++;
     91                     $min = $array_distance[$i][$j];
     92                 }
     93             }
     94             array_push($array,$num);
     95         //    array_push($array,$min);
     96             array_push($array_min,$array);
     97         }
     98         for($i=0;$i<$k;$i++)
     99         {
    100             $center[$i]= array(NULL);
    101             array_splice($center[$i],0,1);
    102         }
    103         for($i=1;$i<count($train);$i++)
    104         {
    105             for($j=0;$j<$k;$j++)
    106             {
    107                 if($array_min[$i-1][1]==($j+1))
    108                 {
    109                     array_push($center[$j],$train[$i]);
    110                     break;
    111                 }
    112             }
    113         }
    114         $array_center = array(array(NULL,NULL,NULL));
    115         array_splice($array_center,0,1);
    116         for($i=0;$i<$k;$i++)
    117         {
    118             $sum = array(NULL);
    119             for($j=0;$j<3;$j++)
    120             {
    121                 $sum[$j] = 0;
    122                 //print_r($sum[$j]);
    123             }
    124             for($j=0;$j<count($center[$i]);$j++)
    125             {
    126                 $sum[0]+=$center[$i][$j][1];
    127                 $sum[1]+=$center[$i][$j][2];
    128                 $sum[2]+=$center[$i][$j][3];
    129             }
    130             for($j=0;$j<3;$j++)
    131             {
    132                 $sum[$j] /= count($center[$i]);
    133                 //print_r($sum[$j]."<BR>");
    134             }
    135             array_push($array_center,$sum);
    136         }
    137         $total_sum2 = JI($center,$array_center);
    138         $flags = false;
    139 /*
    140         print_r($total_sum."<br>");
    141         print_r($total_sum2."<br>");
    142         print_r(abs($total_sum2-$total_sum)."<br>");
    143 */
    144     }while(abs($total_sum2-$total_sum)>0.000002);
    145     
    146     $result = array(array());
    147     array_splice($result,0,1);
    148     for($i=0;$i<count($center);$i++)
    149     {
    150         $temp = array(NULL);
    151         for($j=0;$j<count($center[$i]);$j++)
    152         {
    153             $temp[$j] = $center[$i][$j][0];
    154             print_r($center[$i][$j][0]."  ");
    155         }
    156         array_push($result,$temp);
    157         echo "<br>";
    158     }
    159     return $result;
    160 /*
    161     echo "<pre>";
    162     print_r($array_distance);
    163     echo "<pre>";
    164     print_r($array_min);
    165     echo "<pre>";
    166     print_r($center);
    167         echo "<pre>";
    168     print_r($array_center);
    169 */    
    170 }
    171 //--------------------------------------------------------------------
    172 
    173 /*
    174 *数据[0,1]规格化
    175 */
    176 //--------------------------------------------------------------------
    177 function normalization($train)
    178 {
    179     for($i=1;$i<count($train[0]);$i++)
    180     {
    181         $min = $train[1][$i];
    182         $max = $train[1][$i];
    183         for($j=1;$j<count($train);$j++)
    184         {
    185             if($train[$j][$i]<$min)
    186             {
    187                 $min = $train[$j][$i];
    188             }
    189             
    190             if($train[$j][$i]>$max)
    191             {
    192                 $max = $train[$j][$i];
    193             }
    194         }
    195         for($j=1;$j<count($train);$j++)
    196         {
    197             $train[$j][$i] = round(($train[$j][$i]-$min)/($max-$min),2);
    198         }
    199     }
    200     return $train;
    201 }
    202 //--------------------------------------------------------------------
    203 
    204 
    205 /*
    206 
    207 *把.txt中的内容读到数组中保存
    208 *$filename:文件名称
    209 
    210 */
    211 
    212 //--------------------------------------------------------------------
    213 function  getFileContent($filename)
    214 {
    215     $array = array(null);
    216     $content = file_get_contents($filename);
    217     $result = explode("
    ",$content);
    218     //print_r(count($result));
    219     for($j=0;$j<count($result);$j++)
    220     {
    221         //print_r($result[$j]."<br>");
    222         $con = explode(" ",$result[$j]);
    223         array_push($array,$con);
    224     }
    225     array_splice($array,0,1);
    226     return $array;
    227 }
    228 //--------------------------------------------------------------------
    229 
    230 
    231 /*
    232 
    233 *把数组中内容写到.txt中保存
    234 *$result:要存储的数组内容
    235 *$filename:文件名称
    236 
    237 */
    238 
    239 //--------------------------------------------------------------------
    240 function Array_Totxt($result,$filename)
    241 {
    242     $fp= fopen($filename,'wb');
    243     for($i=0;$i<count($result);$i++)
    244     {
    245         $temp = NULL;
    246         for($j=0;$j<count($result[$i]);$j++)
    247         {
    248             $temp =  $result[$i][$j]."	";
    249             fwrite($fp,$temp);
    250         }
    251         fwrite($fp,"
    ");
    252     }
    253     fclose($fp);
    254 }
    255 //--------------------------------------------------------------------
    256     $train = getFileContent("train.txt");
    257     $train_normalization = normalization($train);
    258 
    259 /*
    260 
    261 *设k=3,即将这15支球队分成三个集团。现抽取日本、巴林和泰国的值作为三个簇的种子
    262 
    263 */
    264     $array_center = array(array(NULL,NULL,NULL));
    265     array_splice($array_center,0,1);
    266     $array1= $train_normalization[2];
    267     array_splice($array1,0,1);
    268     array_push($array_center,$array1);
    269     $array1= $train_normalization[13];
    270     array_splice($array1,0,1);
    271     array_push($array_center,$array1);
    272     $array1= $train_normalization[10];
    273     array_splice($array1,0,1);
    274     array_push($array_center,$array1);
    275     
    276     $result = Kmeans($train_normalization,3,$array_center);
    277     Array_Totxt($result,'result.txt');
    278     Array_Totxt($train_normalization,'normalization_train.txt');
    279 
    280 ?>

    原始数据:

    原始数据进行[0,1]规格化后的数据:

    结果:每行是一个类别

  • 相关阅读:
    Educational Codeforces Round 85 D. Minimum Euler Cycle(模拟/数学/图)
    Educational Codeforces Round 85 C. Circle of Monsters(贪心)
    NOIP 2017 提高组 DAY1 T1小凯的疑惑(二元一次不定方程)
    Educational Codeforces Round 85 B. Middle Class(排序/贪心/水题)
    Educational Codeforces Round 85 A. Level Statistics(水题)
    IOS中的三大事件
    用Quartz 2D画小黄人
    strong、weak、copy、assign 在命名属性时候怎么用
    用代码生成UINavigationController 与UITabBarController相结合的简单QQ框架(部分)
    Attempting to badge the application icon but haven't received permission from the user to badge the application错误解决办法
  • 原文地址:https://www.cnblogs.com/minmsy/p/4974829.html
Copyright © 2011-2022 走看看