zoukankan      html  css  js  c++  java
  • K-Medoide聚类算法原理及其实现Java代码

    K-中心点算法是对K-Means均值算法的改进,由于样本数据可能具有很大的极端值对象,这些数据会严重的扭曲数据的分布,而平方误差和的使用可能会更加恶化这一影响。而k-Medoide算法不是选取簇中对象的均值作为质心,而是在每一个簇内选出一个实际的对象来代表该簇,这个对象就称之为簇的中心点。

    算法实现步骤:

    1.任意选择k个对象作为k个中心点

    2.计算每个非中心点的对象到每个中心点之间的距离

    3.把每一个非中心点对象分配到距离它最近的的中心点所代表的簇中

    4.在每一个聚簇中按照顺序依次选取点,计算该点到当前聚簇中所有点的距离之和,最终距离之和最小的点则视为新的中心点

    5 重复步骤2-4直到各个聚簇的中心点不在发生改变。

    Java代码如下:

    package com.kmedoids;

    import java.awt.font.TextHitInfo;
    import java.util.ArrayList;

    public class Cluster {
    private String clusterName;//类簇名
    private Medoid medoid;//类簇的质点
    private ArrayList<DataPoint> dataPoints;//类簇中个样本点

    //Generate Constructor using Fileds
    //构造方法完成簇类的初始化工作
    public Cluster(String clusterName){
    //super();
    this.clusterName = clusterName;
    this.medoid = null;
    dataPoints = new ArrayList<DataPoint>();
    }

    public void setMedoid(Medoid c){
    this.medoid = c;
    }

    public Medoid getMedoid(){
    return this.medoid;
    }
    //添加样本点
    public void addDataPoint(DataPoint dp){
    dp.setCluster(this);
    this.dataPoints.add(dp);
    }

    public ArrayList<DataPoint> getDataPoints() {
    // TODO Auto-generated method stub
    return this.dataPoints;
    }

    public void removeDataPoint(DataPoint dp){
    this.dataPoints.remove(dp);
    }

    public int getNumDataPoints(){
    return dataPoints.size();
    }

    public DataPoint getDataPoint(int pos){
    return (DataPoint)this.dataPoints.get(pos);
    }

    public String getName(){
    return this.clusterName;
    }
    }

    /*******************************************/

    package com.kmedoids;

    import java.util.ArrayList;

    public class DataPoint{
    private double dimension[];//样本点的维度
    private String PointName;//样本点的名字
    private Cluster cluster;//类簇
    private double euDt;//样本点到质点的距离

    public DataPoint(double[] dimension, String pointName) {
    //super();
    this.dimension = dimension;
    //PointName = pointName;
    this.PointName = pointName;
    this.cluster = null;
    }

    public Cluster getCluster() {
    return cluster;
    }

    public void setCluster(Cluster cluster) {
    this.cluster = cluster;
    }

    //计算欧几里得距离
    public double calEuclideanDistanceSum(){
    double sum = 0.0;
    Cluster cluster = this.getCluster();
    ArrayList<DataPoint> dataPoints = cluster.getDataPoints();
    for(int i=0;i<dataPoints.size();i++){
    double []dims = dataPoints.get(i).getDimensioin();
    for(int j=0;j<dims.length;j++){
    double temp = Math.pow(dims[j]-this.dimension[j],2);
    sum += temp;
    }
    }
    return Math.sqrt(sum);
    }

    public double[] getDimensioin() {
    // TODO Auto-generated method stub
    return this.dimension;
    }

    public double testEuclideanDistance(Medoid md){
    double sum = 0.0;
    double [] cDim = md.getDimension();
    for(int i=0;i<dimension.length;i++){
    double temp = Math.pow(dimension[i]-cDim[i], 2);
    sum += temp;
    }
    return Math.sqrt(sum);
    }

    public String getPointName() {
    return this.PointName;
    }

    public void setPointName(String pointName) {
    PointName = pointName;
    }

    public double getCurrentEudt(){
    return this.euDt;
    }

    }

    /*******************************************************/

    package com.kmedoids;

    import java.util.ArrayList;

    import javax.xml.crypto.Data;

    public class Medoid{
    private double dimension[];//质点的维度
    private Cluster cluster;//所属类簇
    private double etdDisSum;//Medoid到本类簇中的所有的所有欧氏距离之和

    public Medoid(double dimension[]){
    this.dimension=dimension;
    }

    public double[] getDimension() {
    return dimension;
    }

    public void setDimension(double[] dimension) {
    this.dimension = dimension;
    }

    public Cluster getCluster() {
    return cluster;
    }

    public void setCluster(Cluster cluster) {
    this.cluster = cluster;
    }
    //取代价最小的函数点
    public void calcMedoid(){
    calcEtdDisSum();
    double minEucDisSum = this.etdDisSum;
    ArrayList<DataPoint> dps = this.cluster.getDataPoints();
    for(int i = 0;i<dps.size();i++){
    //get()方法获得类簇上面的指定的点
    double tempeucDisSum = dps.get(i).calEuclideanDistanceSum();
    if (tempeucDisSum<minEucDisSum){
    dimension = dps.get(i).getDimensioin();
    minEucDisSum=tempeucDisSum;
    }
    }
    }
    //计算该Medoid到同类簇所有样本点的欧氏距离和
    private void calcEtdDisSum() {
    // TODO Auto-generated method stub
    double sum = 0.0;
    Cluster cluster = this.getCluster();
    ArrayList<DataPoint> dataPoints = cluster.getDataPoints();

    for(int i=0;i<dataPoints.size();i++){
    double [] dims = dataPoints.get(i).getDimensioin();
    for(int j=0;j<dims.length;j++){
    double temp = Math.abs(dims[j]-this.dimension[j]);
    sum += temp;
    }
    }
    etdDisSum = sum;
    }
    }

    /*****************************************************/

    package com.kmedoids;

    import java.util.ArrayList;

    import javax.xml.crypto.Data;

    public class ClusterAnalysis{
    private Cluster[] clusters;//所有类簇
    private int miter;//迭代次数
    //所有样本点
    private ArrayList<DataPoint> dataPoints = new ArrayList<DataPoint>();
    //维度
    private int dimNum;

    public ClusterAnalysis(int k,int iter, ArrayList<DataPoint> dataPoints, int dimNum) {
    //super();
    clusters = new Cluster[k];//类簇的种类数
    for(int i=0;i<k;i++){
    //调用Cluster的public方法
    clusters[i]=new Cluster("Cluster: "+ i);
    }
    this.miter = iter;
    this.dataPoints = dataPoints;
    this.dimNum = dimNum;
    }

    public int getIterations(){
    return miter;
    }

    public ArrayList<DataPoint>[] getClusterOutput(){
    ArrayList<DataPoint> data[]= new ArrayList[clusters.length];
    for(int i=0;i<clusters.length;i++){
    data[i] = clusters[i].getDataPoints();
    }
    return data;
    }

    public void startAnalysis(double[][] medoids){
    setInitiaMedoids(medoids);
    double[][]newMedoids = medoids;
    double[][]oldMedoids = new double[medoids.length][this.dimNum];

    while(!isEqual(oldMedoids,newMedoids)){
    //每次迭代开始时清空各类簇的点
    for(int m=0;m<clusters.length;m++){
    clusters[m].getDataPoints().clear();
    }
    for(int j =0;j<dataPoints.size();j++){
    int clusterIndex = 0;
    double minDistance = Double.MAX_VALUE;
    //判断样本点属于哪一个类簇
    for(int k=0;k<clusters.length;k++){
    double eucDistance = dataPoints.get(j).testEuclideanDistance(clusters[k].getMedoid());
    if(eucDistance<minDistance){
    minDistance = eucDistance;
    clusterIndex = k;
    }
    }
    //将样本点添加到该类簇中
    clusters[clusterIndex].addDataPoint(dataPoints.get(j));
    }
    //重新计算各类簇的质点
    for(int m=0;m<clusters.length;m++){
    clusters[m].getMedoid().calcMedoid();
    }
    // medoids是一个二维数组
    for(int i=0;i<medoids.length;i++){
    for(int j =0 ;j<this.dimNum;j++){
    oldMedoids[i][j] = newMedoids[i][j];
    }
    }
    for(int n=0;n<clusters.length;n++){
    newMedoids[n]=clusters[n].getMedoid().getDimension();
    }
    this.miter ++;
    }

    }

    private boolean isEqual(double[][] oldMedoids, double[][] newMedoids) {
    // TODO Auto-generated method stub
    boolean flag = false;
    for(int i=0;i<oldMedoids.length;i++){
    for(int j=0;j<newMedoids[i].length;j++){
    if(oldMedoids[i][j] != newMedoids[i][j]){
    return flag;
    }
    }
    }
    flag = true;
    return flag;
    }

    private void setInitiaMedoids(double[][] medoids) {
    // TODO Auto-generated method stub
    for(int n =0;n<clusters.length;n++){
    Medoid medoid = new Medoid(medoids[n]);
    clusters[n].setMedoid(medoid);
    medoid.setCluster(clusters[n]);
    }
    }
    }

    /***************************************************/

    package com.kmedoids;

    import java.util.ArrayList;
    import java.util.Iterator;

    public class TestMain{
    public static void main(String[] args) {
    ArrayList<DataPoint> dataPoints = new ArrayList<DataPoint>();

    double[] a={2,3};
    double[] b={2,4};
    double[] c={1,4};
    double[] d={1,3};
    double[] e={2,2};
    double[] f={3,2};

    double[] g={8,7};
    double[] h={8,6};
    double[] i={7,7};
    double[] j={7,6};
    double[] k={8,5};
    // double[] gg={18,7};
    // double[] hh={8,16};
    // double[] ii={7,17};
    // double[] jj={7,16};
    // double[] kk={8,51};

    double[] l={100,200};//孤立点

    double[] m={8,20};
    double[] n={8,19};
    double[] o={7,18};
    double[] p={7,17};
    double[] q={7,20};

    dataPoints.add(new DataPoint(a,"a"));
    dataPoints.add(new DataPoint(b,"b"));
    dataPoints.add(new DataPoint(c,"c"));
    dataPoints.add(new DataPoint(d,"d"));
    dataPoints.add(new DataPoint(e,"e"));
    dataPoints.add(new DataPoint(f,"f"));

    dataPoints.add(new DataPoint(g,"g"));
    dataPoints.add(new DataPoint(h,"h"));
    dataPoints.add(new DataPoint(i,"i"));
    dataPoints.add(new DataPoint(j,"j"));
    dataPoints.add(new DataPoint(k,"k"));

    // dataPoints.add(new DataPoint(gg,"gg"));
    // dataPoints.add(new DataPoint(hh,"hh"));
    // dataPoints.add(new DataPoint(ii,"ii"));
    // dataPoints.add(new DataPoint(jj,"jj"));
    // dataPoints.add(new DataPoint(kk,"kk"));

    dataPoints.add(new DataPoint(l,"l"));

    dataPoints.add(new DataPoint(m,"m"));
    dataPoints.add(new DataPoint(n,"n"));
    dataPoints.add(new DataPoint(o,"o"));
    dataPoints.add(new DataPoint(p,"p"));
    dataPoints.add(new DataPoint(q,"q"));
    //设置初始k值 初始迭代次数 样本点 样本维度
    ClusterAnalysis ca = new ClusterAnalysis(5, 0, dataPoints, 2);
    double[][] cen ={
    //初始中心点的具体值是哪一个
    //K-Medoide算法的初始质心是原样本中的点
    {8,7},{8,6},{7,7},{8,19},{7,20}
    };
    ca.startAnalysis(cen);

    ArrayList<DataPoint>[]v = ca.getClusterOutput();

    for(int dti=0;dti<v.length;dti++){
    ArrayList tempV = v[dti];
    System.out.println("类簇类别"+dti+":");
    Iterator iter = tempV.iterator();
    //如果iter内还有元素可以进行迭代 则返回true
    while(iter.hasNext())
    {
    DataPoint dpTemp =(DataPoint)iter.next();
    System.out.printf(dpTemp.getPointName()+" ");
    }
    System.out.println();
    }
    }
    }

    /********************************************/

    运行结果:

  • 相关阅读:
    Python类知识点
    安装psycopg2时出错:Error: pg_config executable not found.
    top命令
    Ubuntu18.10创建软件图标
    初始化Redis密码
    Ubuntu修改root密码,ssh 允许root用户登录
    Flask_Migrate数据库迁移
    Ubuntu18.04 systemd开机自启
    dnspython
    记一次Celery的仇
  • 原文地址:https://www.cnblogs.com/walxt/p/12192083.html
Copyright © 2011-2022 走看看