zoukankan      html  css  js  c++  java
  • 数据挖掘聚类算法--Kmeans

    算法采用数据集为iris(鸢尾花)可以在UCI上下载 http://archive.ics.uci.edu/ml/datasets/Iris

    数据集介绍:

    1. sepal length in cm
    2. sepal width in cm
    3. petal length in cm
    4. petal width in cm
    5. class:
    -- Iris Setosa
    -- Iris Versicolour
    -- Iris Virginica
    View Code

    直接上代码:

     1 package neugle.kmeans;
     2 
     3 public class IrisModel {
     4     public double Sep_len = 0;
     5     public double Sep_wid = 0;
     6     public double Pet_len = 0;
     7     public double Pet_wid = 0;
     8     public String Iris_type = "";
     9 
    10     public boolean equals(Object obj) {
    11         IrisModel iris = (IrisModel) obj;
    12         return this.Sep_len == iris.Sep_len && this.Sep_wid == iris.Sep_wid
    13                 && this.Pet_len == iris.Pet_len && this.Pet_wid == iris.Pet_wid;
    14     }
    15 }
      1 package neugle.kmeans;
      2 
      3 import java.io.BufferedReader;
      4 import java.io.FileNotFoundException;
      5 import java.io.FileReader;
      6 import java.io.IOException;
      7 import java.util.ArrayList;
      8 import java.util.Iterator;
      9 
     10 public class Kmeans {
     11     private static int k = 3;// 划分簇数目
     12     private static int dataCount = 150;// 文本数量
     13     private static int n = 0;// 迭代次数
     14 
     15     public static void main(String[] args) {
     16         ArrayList<IrisModel> irisList = ReadFile();// 取得文本中数据
     17         ArrayList<IrisModel> beforeRandomPot = new ArrayList<IrisModel>();// 记录上一次质心位置
     18         ArrayList<IrisModel> randomPot = RandomPot(irisList);// 获得随机数据
     19         ArrayList<ArrayList<IrisModel>> kMeansList = null;
     20         while (!CompareRandomPot(beforeRandomPot, randomPot)) {
     21             kMeansList = KMeans(irisList, randomPot);// 进行n次聚类
     22             n++;
     23         }
     24         Print(kMeansList);
     25         System.out.println("迭代了" + n + "次");
     26     }
     27 
     28     // 读取文件中数据
     29     private static ArrayList<IrisModel> ReadFile() {
     30         FileReader read = null;
     31         BufferedReader br = null;
     32         ArrayList<IrisModel> irisList = new ArrayList<IrisModel>();
     33         try {
     34             read = new FileReader("D:\iris.data");
     35             br = new BufferedReader(read);
     36             String readLine = null;
     37             while ((readLine = br.readLine()) != null) {
     38                 IrisModel iris = new IrisModel();
     39                 String[] agrs = readLine.split(",");
     40                 iris.Sep_len = Double.parseDouble(agrs[0]);
     41                 iris.Sep_wid = Double.parseDouble(agrs[1]);
     42                 iris.Pet_len = Double.parseDouble(agrs[2]);
     43                 iris.Pet_wid = Double.parseDouble(agrs[3]);
     44                 iris.Iris_type = agrs[4];
     45                 irisList.add(iris);
     46             }
     47         } catch (FileNotFoundException e) {
     48             System.out.println("读取文件异常");
     49             irisList = null;
     50         } catch (IOException e) {
     51             System.out.println("读取文件异常");
     52             irisList = null;
     53         } finally {
     54             try {
     55                 br.close();
     56             } catch (IOException e) {
     57                 System.out.println("关闭文件异常");
     58             }
     59         }
     60         return irisList;
     61     }
     62 
     63     // 随机生成初始k个点
     64     private static ArrayList<IrisModel> RandomPot(ArrayList<IrisModel> irisList) {
     65         ArrayList<Integer> initCenter = new ArrayList<Integer>();
     66         ArrayList<IrisModel> randomPot = new ArrayList<IrisModel>();
     67         for (int i = 0; i < k; i++) {
     68             int num = (int) (Math.random() * dataCount);
     69             if (!initCenter.contains(num))
     70                 initCenter.add(num);
     71             else
     72                 i--;
     73         }
     74         Iterator<Integer> i = initCenter.iterator();
     75         while (i.hasNext()) {
     76             randomPot.add(irisList.get(i.next()));
     77         }
     78         return randomPot;
     79     }
     80 
     81     // KMeans主程序
     82     private static ArrayList<ArrayList<IrisModel>> KMeans(
     83             ArrayList<IrisModel> irisList, ArrayList<IrisModel> randomPot) {
     84         ArrayList<ArrayList<IrisModel>> groupNum = new ArrayList<ArrayList<IrisModel>>();
     85         for (int i = 0; i < randomPot.size(); i++) {
     86             ArrayList<IrisModel> list = new ArrayList<IrisModel>();
     87             list.add(randomPot.get(i));
     88             groupNum.add(list);
     89         }
     90         for (int i = 0; i < irisList.size(); i++) {
     91             double temp = Double.MAX_VALUE;
     92             int flag = -1;
     93             for (int j = 0; j < randomPot.size(); j++) {
     94                 double distance = DistanceOfTwoPoint(irisList.get(i),
     95                         randomPot.get(j));
     96                 if (distance < temp) {
     97                     temp = distance;
     98                     flag = j;
     99                 }
    100             }
    101             groupNum.get(flag).add(irisList.get(i));
    102         }
    103         // 重新计算质心
    104         ArrayList<IrisModel> tempList = CalcCenter(groupNum);
    105         randomPot.clear();
    106         for (int i = 0; i < tempList.size(); i++) {
    107             randomPot.add(tempList.get(i));
    108         }
    109         return groupNum;
    110     }
    111 
    112     // 计算两点欧氏距离
    113     private static double DistanceOfTwoPoint(IrisModel d1, IrisModel d2) {
    114         double sum = Math.sqrt(Math.pow((d1.Sep_len - d2.Sep_len), 2)
    115                 + Math.pow((d1.Sep_wid - d2.Sep_wid), 2)
    116                 + Math.pow((d1.Pet_len - d2.Pet_len), 2)
    117                 + Math.pow((d1.Pet_wid - d2.Pet_wid), 2));
    118         return sum;
    119     }
    120 
    121     // 重新计算k个簇的质心
    122     private static ArrayList<IrisModel> CalcCenter(
    123             ArrayList<ArrayList<IrisModel>> c) {
    124         ArrayList<IrisModel> cIris = new ArrayList<IrisModel>();
    125         Iterator<ArrayList<IrisModel>> i = c.iterator();
    126         while (i.hasNext()) {
    127             ArrayList<IrisModel> irisList = i.next();
    128             IrisModel eIris = new IrisModel();
    129             for (int k = 0; k < irisList.size(); k++) {
    130                 eIris.Sep_len += irisList.get(k).Sep_len;
    131                 eIris.Sep_wid += irisList.get(k).Sep_wid;
    132                 eIris.Pet_len += irisList.get(k).Pet_len;
    133                 eIris.Pet_wid += irisList.get(k).Pet_wid;
    134             }
    135             eIris.Sep_len = eIris.Sep_len / irisList.size();
    136             eIris.Sep_wid = eIris.Sep_wid / irisList.size();
    137             eIris.Pet_len = eIris.Pet_len / irisList.size();
    138             eIris.Pet_wid = eIris.Pet_wid / irisList.size();
    139             cIris.add(eIris);
    140         }
    141 
    142         return cIris;
    143     }
    144 
    145     // 比较前后两次的质心,以确定是否结束
    146     private static Boolean CompareRandomPot(
    147             ArrayList<IrisModel> beforeRandomPot, ArrayList<IrisModel> randomPot) {
    148         boolean flag = true;
    149         for (int i = 0; i < randomPot.size(); i++) {
    150             if (beforeRandomPot.size() <= 0
    151                     || !beforeRandomPot.contains(randomPot.get(i))) {
    152                 flag = false;
    153                 break;
    154             }
    155         }
    156         if (flag == false) {
    157             if (beforeRandomPot.size() > 0) {
    158                 beforeRandomPot.clear();
    159             }
    160             for (int i = 0; i < randomPot.size(); i++) {
    161                 beforeRandomPot.add(randomPot.get(i));
    162             }
    163         }
    164         return flag;
    165     }
    166 
    167     // 打印
    168     private static void Print(ArrayList<ArrayList<IrisModel>> kmeansList) {
    169         System.out.println("------------------------------------");
    170         Iterator<ArrayList<IrisModel>> i = kmeansList.iterator();
    171         while (i.hasNext()) {
    172             Iterator<IrisModel> ii = i.next().iterator();
    173             int n = 0;
    174             while (ii.hasNext()) {
    175                 n++;
    176                 IrisModel irisModel = ii.next();
    177                 if (n == 1)
    178                     continue;
    179                 System.out.println(irisModel.Sep_len + " " + irisModel.Sep_wid
    180                         + " " + irisModel.Pet_len + " " + irisModel.Pet_wid
    181                         + " " + irisModel.Iris_type);
    182             }
    183             System.out.println(n - 1);
    184             System.out.println("------------------------------------");
    185         }
    186     }
    187 }

    实验结果:

    ------------------------------------
    7.0 3.2 4.7 1.4 Iris-versicolor
    6.4 3.2 4.5 1.5 Iris-versicolor
    5.5 2.3 4.0 1.3 Iris-versicolor
    6.5 2.8 4.6 1.5 Iris-versicolor
    5.7 2.8 4.5 1.3 Iris-versicolor
    6.3 3.3 4.7 1.6 Iris-versicolor
    4.9 2.4 3.3 1.0 Iris-versicolor
    6.6 2.9 4.6 1.3 Iris-versicolor
    5.2 2.7 3.9 1.4 Iris-versicolor
    5.0 2.0 3.5 1.0 Iris-versicolor
    5.9 3.0 4.2 1.5 Iris-versicolor
    6.0 2.2 4.0 1.0 Iris-versicolor
    6.1 2.9 4.7 1.4 Iris-versicolor
    5.6 2.9 3.6 1.3 Iris-versicolor
    6.7 3.1 4.4 1.4 Iris-versicolor
    5.6 3.0 4.5 1.5 Iris-versicolor
    5.8 2.7 4.1 1.0 Iris-versicolor
    6.2 2.2 4.5 1.5 Iris-versicolor
    5.6 2.5 3.9 1.1 Iris-versicolor
    5.9 3.2 4.8 1.8 Iris-versicolor
    6.1 2.8 4.0 1.3 Iris-versicolor
    6.3 2.5 4.9 1.5 Iris-versicolor
    6.1 2.8 4.7 1.2 Iris-versicolor
    6.4 2.9 4.3 1.3 Iris-versicolor
    6.6 3.0 4.4 1.4 Iris-versicolor
    6.8 2.8 4.8 1.4 Iris-versicolor
    6.0 2.9 4.5 1.5 Iris-versicolor
    5.7 2.6 3.5 1.0 Iris-versicolor
    5.5 2.4 3.8 1.1 Iris-versicolor
    5.5 2.4 3.7 1.0 Iris-versicolor
    5.8 2.7 3.9 1.2 Iris-versicolor
    6.0 2.7 5.1 1.6 Iris-versicolor
    5.4 3.0 4.5 1.5 Iris-versicolor
    6.0 3.4 4.5 1.6 Iris-versicolor
    6.7 3.1 4.7 1.5 Iris-versicolor
    6.3 2.3 4.4 1.3 Iris-versicolor
    5.6 3.0 4.1 1.3 Iris-versicolor
    5.5 2.5 4.0 1.3 Iris-versicolor
    5.5 2.6 4.4 1.2 Iris-versicolor
    6.1 3.0 4.6 1.4 Iris-versicolor
    5.8 2.6 4.0 1.2 Iris-versicolor
    5.0 2.3 3.3 1.0 Iris-versicolor
    5.6 2.7 4.2 1.3 Iris-versicolor
    5.7 3.0 4.2 1.2 Iris-versicolor
    5.7 2.9 4.2 1.3 Iris-versicolor
    6.2 2.9 4.3 1.3 Iris-versicolor
    5.1 2.5 3.0 1.1 Iris-versicolor
    5.7 2.8 4.1 1.3 Iris-versicolor
    5.8 2.7 5.1 1.9 Iris-virginica
    4.9 2.5 4.5 1.7 Iris-virginica
    5.7 2.5 5.0 2.0 Iris-virginica
    5.8 2.8 5.1 2.4 Iris-virginica
    6.0 2.2 5.0 1.5 Iris-virginica
    5.6 2.8 4.9 2.0 Iris-virginica
    6.3 2.7 4.9 1.8 Iris-virginica
    6.2 2.8 4.8 1.8 Iris-virginica
    6.1 3.0 4.9 1.8 Iris-virginica
    6.3 2.8 5.1 1.5 Iris-virginica
    6.0 3.0 4.8 1.8 Iris-virginica
    5.8 2.7 5.1 1.9 Iris-virginica
    6.3 2.5 5.0 1.9 Iris-virginica
    5.9 3.0 5.1 1.8 Iris-virginica
    62
    ------------------------------------
    5.1 3.5 1.4 0.2 Iris-setosa
    4.9 3.0 1.4 0.2 Iris-setosa
    4.7 3.2 1.3 0.2 Iris-setosa
    4.6 3.1 1.5 0.2 Iris-setosa
    5.0 3.6 1.4 0.2 Iris-setosa
    5.4 3.9 1.7 0.4 Iris-setosa
    4.6 3.4 1.4 0.3 Iris-setosa
    5.0 3.4 1.5 0.2 Iris-setosa
    4.4 2.9 1.4 0.2 Iris-setosa
    4.9 3.1 1.5 0.1 Iris-setosa
    5.4 3.7 1.5 0.2 Iris-setosa
    4.8 3.4 1.6 0.2 Iris-setosa
    4.8 3.0 1.4 0.1 Iris-setosa
    4.3 3.0 1.1 0.1 Iris-setosa
    5.8 4.0 1.2 0.2 Iris-setosa
    5.7 4.4 1.5 0.4 Iris-setosa
    5.4 3.9 1.3 0.4 Iris-setosa
    5.1 3.5 1.4 0.3 Iris-setosa
    5.7 3.8 1.7 0.3 Iris-setosa
    5.1 3.8 1.5 0.3 Iris-setosa
    5.4 3.4 1.7 0.2 Iris-setosa
    5.1 3.7 1.5 0.4 Iris-setosa
    4.6 3.6 1.0 0.2 Iris-setosa
    5.1 3.3 1.7 0.5 Iris-setosa
    4.8 3.4 1.9 0.2 Iris-setosa
    5.0 3.0 1.6 0.2 Iris-setosa
    5.0 3.4 1.6 0.4 Iris-setosa
    5.2 3.5 1.5 0.2 Iris-setosa
    5.2 3.4 1.4 0.2 Iris-setosa
    4.7 3.2 1.6 0.2 Iris-setosa
    4.8 3.1 1.6 0.2 Iris-setosa
    5.4 3.4 1.5 0.4 Iris-setosa
    5.2 4.1 1.5 0.1 Iris-setosa
    5.5 4.2 1.4 0.2 Iris-setosa
    4.9 3.1 1.5 0.1 Iris-setosa
    5.0 3.2 1.2 0.2 Iris-setosa
    5.5 3.5 1.3 0.2 Iris-setosa
    4.9 3.1 1.5 0.1 Iris-setosa
    4.4 3.0 1.3 0.2 Iris-setosa
    5.1 3.4 1.5 0.2 Iris-setosa
    5.0 3.5 1.3 0.3 Iris-setosa
    4.5 2.3 1.3 0.3 Iris-setosa
    4.4 3.2 1.3 0.2 Iris-setosa
    5.0 3.5 1.6 0.6 Iris-setosa
    5.1 3.8 1.9 0.4 Iris-setosa
    4.8 3.0 1.4 0.3 Iris-setosa
    5.1 3.8 1.6 0.2 Iris-setosa
    4.6 3.2 1.4 0.2 Iris-setosa
    5.3 3.7 1.5 0.2 Iris-setosa
    5.0 3.3 1.4 0.2 Iris-setosa
    50
    ------------------------------------
    6.9 3.1 4.9 1.5 Iris-versicolor
    6.7 3.0 5.0 1.7 Iris-versicolor
    6.3 3.3 6.0 2.5 Iris-virginica
    7.1 3.0 5.9 2.1 Iris-virginica
    6.3 2.9 5.6 1.8 Iris-virginica
    6.5 3.0 5.8 2.2 Iris-virginica
    7.6 3.0 6.6 2.1 Iris-virginica
    7.3 2.9 6.3 1.8 Iris-virginica
    6.7 2.5 5.8 1.8 Iris-virginica
    7.2 3.6 6.1 2.5 Iris-virginica
    6.5 3.2 5.1 2.0 Iris-virginica
    6.4 2.7 5.3 1.9 Iris-virginica
    6.8 3.0 5.5 2.1 Iris-virginica
    6.4 3.2 5.3 2.3 Iris-virginica
    6.5 3.0 5.5 1.8 Iris-virginica
    7.7 3.8 6.7 2.2 Iris-virginica
    7.7 2.6 6.9 2.3 Iris-virginica
    6.9 3.2 5.7 2.3 Iris-virginica
    7.7 2.8 6.7 2.0 Iris-virginica
    6.7 3.3 5.7 2.1 Iris-virginica
    7.2 3.2 6.0 1.8 Iris-virginica
    6.4 2.8 5.6 2.1 Iris-virginica
    7.2 3.0 5.8 1.6 Iris-virginica
    7.4 2.8 6.1 1.9 Iris-virginica
    7.9 3.8 6.4 2.0 Iris-virginica
    6.4 2.8 5.6 2.2 Iris-virginica
    6.1 2.6 5.6 1.4 Iris-virginica
    7.7 3.0 6.1 2.3 Iris-virginica
    6.3 3.4 5.6 2.4 Iris-virginica
    6.4 3.1 5.5 1.8 Iris-virginica
    6.9 3.1 5.4 2.1 Iris-virginica
    6.7 3.1 5.6 2.4 Iris-virginica
    6.9 3.1 5.1 2.3 Iris-virginica
    6.8 3.2 5.9 2.3 Iris-virginica
    6.7 3.3 5.7 2.5 Iris-virginica
    6.7 3.0 5.2 2.3 Iris-virginica
    6.5 3.0 5.2 2.0 Iris-virginica
    6.2 3.4 5.4 2.3 Iris-virginica
    38
    ------------------------------------
    迭代了16次
    View Code
  • 相关阅读:
    Spark开发-SparkUDAF(二)
    Spark开发-Spark UDAF(一)
    Spark开发-Spark中类型安全UDAF开发示例
    Spark开发_构建TypeSafe的Dataset
    布隆过滤器(Bloom Filter)
    一个 Spark 应用程序的完整执行流程
    Spark的RPC
    Spark调优
    Hbase系列文章
    Flink怎么做到精确一次的?
  • 原文地址:https://www.cnblogs.com/niuxiaoha/p/4645989.html
Copyright © 2011-2022 走看看