zoukankan      html  css  js  c++  java
  • C#实现K-MEDOIDS聚类算法

    1、任意选取K个对象作为初始聚类中心(O1,O2,…Oi…Ok)。  
    2)将余下的对象分到各个类中去(该对象与哪一个聚类中心最近就被分配到哪一个聚类簇中);  
    3)对于每个类(Oi)中,顺序选取一个Or,重复步骤2,计算用Or代替Oi后的误差E=各个点到其对应的中心点欧式距离之和。选择E最小的那个Or来代替Oi。
    4)重复步骤3,直到K个medoids固定下来。

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.IO;
    
    namespace K_medoids
    {
        class Program
        {
            /// <summary>
            /// 程序文本数据文件应位于同一文件夹下
            /// </summary>
            /// <param name="args"></param>
            static void Main(string[] args)
            {
                var path = string.Empty;
                int k = 0;
                try
                {
                    path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, args[0]);//数据文件路径
                    k = Convert.ToInt32(args[1]);
                }
                catch (Exception)
                {
                    Console.Write("参数错误");
                    return;
                }
                var reader=new StreamReader(path,Encoding.Default);
                var indivadulStr =reader.ReadLine().Trim();//每一行数据的字符串形式
                var stanStr= System.Text.RegularExpressions.Regex.Replace(indivadulStr, @" +", " ");
                var firstData=stanStr.Split(' ');//第一行数据
                var realIndivadul = new Indivaduls();//数据结构体
                var db = new List<Indivaduls>();//存放所有数据
                foreach (var s in firstData)
                {
                    realIndivadul.Numbers.Add(Convert.ToDouble(s));
                }
                db.Add(realIndivadul);
                while ((indivadulStr=reader.ReadLine())!=null)
                {
                    var stringNumber = indivadulStr.Trim().Split(' ');
                    var doubleNumber = stringNumber.Select(Convert.ToDouble).ToList();
                    db.Add(new Indivaduls(){Numbers =doubleNumber});
                }
    
                var initialCenters = new List<Indivaduls>();//聚类初始中心个体
                var gap = db.Count/k + 1;
                for (int i = 0; i < k; i++)
                {
                    initialCenters.Add(db[i*gap]);
                }
                var result=Pam(db, initialCenters);
                foreach (var crow in result)
                {
                    foreach (var number in crow.CenterPoint.Numbers)
                    {
                        Console.Write(number+"");
                    }
                    Console.Write("
    ");
                    foreach (var point in crow.CrowsPoint)
                    {
                        foreach (var number in point.Numbers)
                        {
                            Console.Write(number+"");
                        }
                        Console.Write("
    ");
                    }
                    Console.WriteLine("========================");
                }
                Console.ReadKey();
    
            }
          /// <summary>
          /// 
          /// </summary>
          /// <param name="indivadulses"></param>
          /// <param name="centerPoints"></param>
          /// <returns>聚类结果 簇</returns>
            public static List<Crows> Pam(List<Indivaduls> indivadulses,List<Indivaduls> centerPoints  )
            {
                var firstCrows = K_medoids(indivadulses, centerPoints);
                
                var resultCenterPoints = new List<Indivaduls>();//存放结果中心点
                for (int i = 0; i < firstCrows.Count; i++)//循环每一个簇
                {
                    resultCenterPoints.Add(firstCrows[i].CenterPoint);
                    var oldOtherCrows =new List<Crows>();
                    oldOtherCrows.AddRange(firstCrows);
                    oldOtherCrows.RemoveAt(i);
    
    
                    var oldDiff = AbsoluteDiff(firstCrows[i], oldOtherCrows);
    
                    var count=firstCrows[i].CrowsPoint.Count;
                    for (int j = 0; j < count; j++)//循环每一个簇中的非中心点个体
                    {
                        //var otherCrowsss = new List<Crows>();//除去要替换的中心点所在的簇 其他簇
                        //otherCrowsss.AddRange(firstCrows);
                        //otherCrowsss.RemoveAt(i);
    
                        var newCenterPoints = new List<Indivaduls>();//新的中心点集合
                        newCenterPoints.AddRange(centerPoints);
                        newCenterPoints.RemoveAt(i);
                        newCenterPoints.Add(firstCrows[i].CrowsPoint[j]);
    
                        var newOtherCrowsCenterPoints = new List<Indivaduls>();//新的除变化点所在簇的中心 的其他中心点
                        newOtherCrowsCenterPoints.AddRange(centerPoints);
                        newOtherCrowsCenterPoints.RemoveAt(i);
                        
                        var newCrows = K_medoids(indivadulses,newCenterPoints); //替换点后 新的聚类簇
    
                        var newOtherCrows = new List<Crows>();
                        var newCrow = new Crows();
                        
                        foreach (var crow in newCrows)
                        {
                            if (newOtherCrowsCenterPoints.MyContains(crow.CenterPoint))
                            {
                                newOtherCrows.Add(crow);
                            }
                            else
                            {
                                newCrow = crow;
                            }
                        }
                        var newDiff = AbsoluteDiff(newCrow,newOtherCrows);
                        if (newDiff < oldDiff)
                        {
                            resultCenterPoints[i] = newCrow.CenterPoint;
                            oldDiff = newDiff;
                        }
    
                    }
                    
                }
              var resultCrows= K_medoids(indivadulses, resultCenterPoints);
                return resultCrows;
            }
            /// <summary>
            /// 单次聚类
            /// </summary>
            /// <param name="indivadulses">待聚类个体,包括了中心点</param>
            /// <param name="centerPoints">中心点个体</param>
            /// <returns>聚类结果</returns>
            public static List<Crows> K_medoids(List<Indivaduls> indivadulses,List<Indivaduls> centerPoints)
            {
                var resultCrows = new List<Crows>();//聚类结果 簇集合
                var indivadulsCount = indivadulses.Count;//待分配个体的个数,包括了中心点
                for (var i = 0; i < centerPoints.Count; i++)
                {
                    resultCrows.Add(new Crows() { CenterPoint = centerPoints[i] });
                }
                for (int i = 0; i < indivadulsCount; i++)
                {
                    if (!centerPoints.MyContains(indivadulses[i]))
                    {
                        int myNumber = 0;//要将这个点归类到 序号为0的resultCrows中
                        var firstDic = P2PDistance(indivadulses[i], resultCrows[0].CenterPoint);//该点与第一个中心的距离
                        for (int j = 1; j < resultCrows.Count; j++)
                        {
                            var otherDic = P2PDistance(indivadulses[i], resultCrows[j].CenterPoint);
                            if (otherDic < firstDic)
                            {
                                firstDic = otherDic;
                                myNumber = j;
                            }
                        }
                        resultCrows[myNumber].CrowsPoint.Add(indivadulses[i]);
                    }
                }
                return resultCrows;
            }
            /// <summary>
            /// 对于已经完成一次聚类后的某一个点center计算绝对误差
            /// </summary>
            /// <param name="centerCrow">要计算绝对误差的中心点群簇</param>
            /// <param name="otherPoints">除中心点群簇外的其他群簇</param>
            public static double AbsoluteDiff(Crows centerCrow,List<Crows> otherPoints )
            {
                var countCrows = otherPoints.Count;
                var  distance = Distance(centerCrow);
                for (var i = 0; i < countCrows; i++)
                {
                    distance += Distance(otherPoints[i]);
                }
                return distance;
            }
            /// <summary>
            /// 计算群簇中各个点距离中心点的欧式距离
            /// </summary>
            /// <param name="crow">群簇</param>
            /// <returns>欧式距离</returns>
            public static double Distance(Crows crow)
            {
                var pointCount = crow.CrowsPoint.Count;//非中心点的个数
                var distance = 0.0;//总距离
                for (var i = 0; i < pointCount; i++)
                {
                    distance += P2PDistance(crow.CenterPoint, crow.CrowsPoint[i]);
                }
                return distance;
            }
            /// <summary>
            /// 两点间欧式距离
            /// </summary>
            /// <param name="p1">点p1</param>
            /// <param name="p2">点p2</param>
            /// <returns></returns>
            public static double P2PDistance(Indivaduls p1,Indivaduls p2)
            {
                if (p1.Numbers.Count != p2.Numbers.Count || p1.Numbers.Count == 0)
                {
                    throw new Exception();
                }
                var dimension = p1.Numbers.Count;
                var result = 0.0;
                for (var i = 0; i < dimension; i++)
                {
                    result += (p1.Numbers[i] - p2.Numbers[i])*(p1.Numbers[i] - p2.Numbers[i]);
                }
                return Math.Sqrt(result);
            }
    
        }
        /// <summary>
        /// 一个点个体
        /// </summary>
        public class Indivaduls
        {
            public List<double> Numbers;
            public Indivaduls()
            {
                this.Numbers=new List<double>();
            }
            public  bool MyEquals(Indivaduls obj)
            {
                if (obj.Numbers.Count != this.Numbers.Count)
                    return false;
                for (int i = 0; i < Numbers.Count; i++)
                {
                    if (this.Numbers[i] != obj.Numbers[i])
                        return false;
                }
                return true;
            }
        }
        /// <summary>
        /// 一个聚类簇
        /// </summary>
        public class Crows
        {
            public Crows()
            {
                this.CrowsPoint=new List<Indivaduls>();
                this.CenterPoint=new Indivaduls();
            }
            public List<Indivaduls> CrowsPoint;//簇中除中心点外的其他个体点
            public Indivaduls CenterPoint;//聚类簇中心点
        }
        public static class ExpandList
        {
            /// <summary>
            /// 扩展方法、判断该集合中是否存在point个体
            /// </summary>
            /// <param name="indivadulses"></param>
            /// <param name="point"></param>
            /// <returns></returns>
            public static bool MyContains(this List<Indivaduls> indivadulses,Indivaduls point)
            {
                foreach (var indivadulse in indivadulses)
                {
                    if (point.MyEquals(indivadulse))
                      return true;
                }
                return false;
            }
        }
    }
    

      

  • 相关阅读:
    【洛谷P3628】特别行动队
    【洛谷P3233】世界树
    【BZOJ1597】土地购买
    【洛谷P4068】数字配对
    【洛谷P3899】谈笑风生
    【BZOJ2726】任务安排
    【洛谷P6186】[NOI Online 提高组] 冒泡排序
    【洛谷P3369】【模板】普通平衡树
    【UOJ#8】Quine
    标准 插入flash
  • 原文地址:https://www.cnblogs.com/foreveryt/p/4321368.html
Copyright © 2011-2022 走看看