zoukankan      html  css  js  c++  java
  • 处理输入为非对角阵的Clustering by fast search and find of density peak代码

    Clustering by fast search and find of density peak. Alex Rodriguez, Alessandro Laio

    是发表在Science上的一篇很好的阐述一种新聚类算法的paper,其自带代码

    http://www.sciencemag.org/content/suppl/2014/06/25/344.6191.1492.DC1/1242072DataS1.zip

    展示了该代码的实现过程和使用结果。

    不过,该测试代码要求输入为一个格式特殊的三角矩阵,为了使用方便,这里我把源代码做少量修改,

    增加预处理部分,使得可以直接处理标准的距离矩阵格式数据

    clear all
    close all
    % disp('The only input needed is a distance matrix file')
    % disp('The format of this file should be: ')
    % disp('Column 1: id of element i')
    % disp('Column 2: id of element j')
    % disp('Column 3: dist(i,j)')
    % mdist=input('name of the distance matrix file (with single quotes)?
    ');
    % disp('Reading input distance matrix')
    % xx=load(mdist);
    % xx = load('D:example_distances.dat')
    %x = load('C:UseTraceOfAllUsers.txt');
    x = load('D:CoursesCloud ComputingClusteringdataWaitingForClustering4.2.txt');
    minX = min(x);
    maxX = max(x);
    ran = maxX - minX;
    nx(:,1) = (x(:,1) - minX(1,1)) / ran(1,1);
    nx(:,2) = (x(:,2) - minX(1,2)) / ran(1,2);
    dist = pdist2(nx, nx);
    N = size(dist,1);
    xx = zeros((N-1)*N/2, 3);
    idx = 1;
    for i=1:N
        for j=i+1:N
            xx(idx, 1) = i;
            xx(idx, 2) = j;
            xx(idx, 3) = dist(i, j);
            idx = idx + 1;
        end
    end
    N = size(xx, 1);        
    ND=max(xx(:,2));
    NL=max(xx(:,1));
    if (NL>ND)
      ND=NL;
    end
    % N=size(xx,1);
    % for i=1:ND
    %   for j=1:ND
    %     dist(i,j)=0;
    %   end
    % end
    % for i=1:N
    %   ii=xx(i,1);
    %   jj=xx(i,2);
    %   dist(ii,jj)=xx(i,3);
    %   dist(jj,ii)=xx(i,3);
    % end
    % percent=2;
    % fprintf('average percentage of neighbours (hard coded): %5.6f
    ', percent);
    % 
    % position=round(N*percent/100);
    % sda=sort(xx(:,3));
    % dc=sda(position);
    dc = 0.15;
    
    fprintf('Computing Rho with gaussian kernel of radius: %12.6f
    ', dc);
    
    
    for i=1:ND
      rho(i)=0.;
    end
    %
    % Gaussian kernel
    %
    for i=1:ND-1
      for j=i+1:ND
         rho(i)=rho(i)+exp(-(dist(i,j)/dc)*(dist(i,j)/dc));
         rho(j)=rho(j)+exp(-(dist(i,j)/dc)*(dist(i,j)/dc));
      end
    end
    %
    % "Cut off" kernel
    %
    % for i=1:ND-1
    %  for j=i+1:ND
    %    if (dist(i,j)<dc)S
    %       rho(i)=rho(i)+1.;
    %       rho(j)=rho(j)+1.;
    %    end
    %  end
    % end
    
    maxd=max(max(dist));
    
    [rho_sorted,ordrho]=sort(rho,'descend');
    delta(ordrho(1))=-1.;
    nneigh(ordrho(1))=0;
    
    for ii=2:ND
       delta(ordrho(ii))=maxd;
       for jj=1:ii-1
         if(dist(ordrho(ii),ordrho(jj))<delta(ordrho(ii)))
            delta(ordrho(ii))=dist(ordrho(ii),ordrho(jj));
            nneigh(ordrho(ii))=ordrho(jj);
         end
       end
    end
    delta(ordrho(1))=max(delta(:));
    disp('Generated file:DECISION GRAPH')
    disp('column 1:Density')
    disp('column 2:Delta')
    
    fid = fopen('DECISION_GRAPH', 'w');
    for i=1:ND
       fprintf(fid, '%6.2f %6.2f
    ', rho(i),delta(i));
    end
    
    disp('Select a rectangle enclosing cluster centers')
    scrsz = get(0,'ScreenSize');
    figure('Position',[6 72 scrsz(3)/4. scrsz(4)/1.3]);
    for i=1:ND
      ind(i)=i;
      gamma(i)=rho(i)*delta(i);
    end
    subplot(2,1,1)
    tt=plot(rho(:),delta(:),'o','MarkerSize',5,'MarkerFaceColor','k','MarkerEdgeColor','k');
    title ('Decision Graph','FontSize',15.0)
    xlabel ('
    ho')
    ylabel ('delta')
    
    
    subplot(2,1,1)
    rect = getrect(1);
    rhomin=rect(1);
    deltamin=rect(4);
    NCLUST=0;
    for i=1:ND
      cl(i)=-1;
    end
    for i=1:ND
      if ( (rho(i)>rhomin) && (delta(i)>deltamin))
         NCLUST=NCLUST+1;
         cl(i)=NCLUST;
         icl(NCLUST)=i;
      end
    end
    fprintf('NUMBER OF CLUSTERS: %i 
    ', NCLUST);
    disp('Performing assignation')
    
    %assignation
    for i=1:ND
      if (cl(ordrho(i))==-1)
        cl(ordrho(i))=cl(nneigh(ordrho(i)));
      end
    end
    %halo
    for i=1:ND
      halo(i)=cl(i);
    end
    if (NCLUST>1)
      for i=1:NCLUST
        bord_rho(i)=0.;
      end
      for i=1:ND-1
        for j=i+1:ND
          if ((cl(i)~=cl(j))&& (dist(i,j)<=dc))
            rho_aver=(rho(i)+rho(j))/2.;
            if (rho_aver>bord_rho(cl(i))) 
              bord_rho(cl(i))=rho_aver;
            end
            if (rho_aver>bord_rho(cl(j))) 
              bord_rho(cl(j))=rho_aver;
            end
          end
        end
      end
      for i=1:ND
        if (rho(i)<bord_rho(cl(i)))
          halo(i)=0;
        end
      end
    end
    for i=1:NCLUST
      nc=0;
      nh=0;
      for j=1:ND
        if (cl(j)==i) 
          nc=nc+1;
        end
        if (halo(j)==i) 
          nh=nh+1;
        end
      end
      fprintf('CLUSTER: %i CENTER: %i ELEMENTS: %i CORE: %i HALO: %i 
    ', i,icl(i),nc,nh,nc-nh);
    end
    
    cmap=colormap;
    for i=1:NCLUST
       ic=int8((i*64.)/(NCLUST*1.));
       subplot(2,1,1)
       hold on
       plot(rho(icl(i)),delta(icl(i)),'o','MarkerSize',8,'MarkerFaceColor',cmap(ic,:),'MarkerEdgeColor',cmap(ic,:));
    end
    subplot(2,1,2)
    disp('Performing 2D nonclassical multidimensional scaling')
    Y1 = mdscale(dist, 2, 'criterion','metricstress');
    plot(Y1(:,1),Y1(:,2),'o','MarkerSize',2,'MarkerFaceColor','k','MarkerEdgeColor','k');
    title ('2D Nonclassical multidimensional scaling','FontSize',15.0)
    xlabel ('X')
    ylabel ('Y')
    for i=1:ND
     A(i,1)=0.;
     A(i,2)=0.;
    end
    for i=1:NCLUST
      nn=0;
      ic=int8((i*64.)/(NCLUST*1.));
      for j=1:ND
        if (halo(j)==i)
          nn=nn+1;
          A(nn,1)=Y1(j,1);
          A(nn,2)=Y1(j,2);
        end
      end
      hold on
      plot(A(1:nn,1),A(1:nn,2),'o','MarkerSize',2,'MarkerFaceColor',cmap(ic,:),'MarkerEdgeColor',cmap(ic,:));
    end
    
    %for i=1:ND
    %   if (halo(i)>0)
    %      ic=int8((halo(i)*64.)/(NCLUST*1.));
    %      hold on
    %      plot(Y1(i,1),Y1(i,2),'o','MarkerSize',2,'MarkerFaceColor',cmap(ic,:),'MarkerEdgeColor',cmap(ic,:));
    %   end
    %end
    faa = fopen('CLUSTER_ASSIGNATION', 'w');
    disp('Generated file:CLUSTER_ASSIGNATION')
    disp('column 1:element id')
    disp('column 2:cluster assignation without halo control')
    disp('column 3:cluster assignation with halo control')
    for i=1:ND
       fprintf(faa, '%i %i %i
    ',i,cl(i),halo(i));
    end
  • 相关阅读:
    [openjudge] jubeeeeeat
    [BJOI2006] 狼抓兔子
    [模板]网络最大流
    [HNOI2002]营业额统计
    【Java学习笔记之十一】Java中常用的8大排序算法详解总结
    【Java学习笔记之十】Java中循环语句foreach使用总结及foreach写法失效的问题
    【Java学习笔记之九】java二维数组及其多维数组的内存应用拓展延伸
    【Java学习笔记之八】JavaBean中布尔类型使用注意事项
    【Java学习笔记之七】java函数的语法规则总结
    二分图匹配--匈牙利算法模板
  • 原文地址:https://www.cnblogs.com/instant7/p/4289293.html
Copyright © 2011-2022 走看看