zoukankan      html  css  js  c++  java
  • [聚类算法]常用功能实现

    前言:聚类是非监督学习的主要任务之一,根据原理可分为:基于质心、基于密度、基于连通性、基于概率以及基于神经网络等多种类型。

    本文汇总了常用聚类算法及其评价指标,方便快速查询使用。(本文使用波士顿房价数据集,可用于回归)

    以下为试验结果:

     1 from time import time
     2 
     3 import numpy as np
     4 import pandas as pd
     5 import matplotlib as mpl
     6 import matplotlib.pyplot as plt
     7 import sklearn
     8 from sklearn import datasets
     9 
    10 from sklearn.decomposition import PCA
    11 from sklearn.preprocessing import scale
    12 
    13 from sklearn import metrics
    14 from sklearn.cluster import KMeans
    15 from sklearn.cluster import MeanShift
    16 from sklearn.cluster import DBSCAN
    17 from sklearn.cluster import AgglomerativeClustering
    18 
    19 #1. 加载数据
    20 boston = sklearn.datasets.load_boston()
    21 x,y = boston.data, boston.target
    22 y = y.reshape(len(y),1)
    23 data = np.hstack([x,y])
    24 
    25 #2. 特征归一化
    26 x = scale(x)
    27 data = scale(data)
    28 
    29 
    30 #3. 分析数据
    31 name_data = boston.feature_names
    32 #print(name_data)
    33 
    34 df_x = pd.DataFrame(x,columns=name_data)
    35 df_y = pd.DataFrame(y,columns=['MEDV'],dtype=np.int32)
    36 df = pd.concat([df_x,df_y],axis=1)
    37 
    38 # #506条数据,没有空值,float64类型
    39 # print(df.head())
    40 # print(df.info())
    41 # print(df['MEDV'].describe())
    42 
    43 #拟分为4类,目标为:<=17.025,>17.025 and <= 21.2, >21.2 and <=25, >25
    44 n_clusters = 4#聚簇数量
    45 df_y.loc[df_y['MEDV'] < 20] = 0
    46 df_y.loc[(df_y['MEDV'] > 17.025) & (df_y['MEDV'] <= 21.2)] = 1
    47 df_y.loc[(df_y['MEDV'] > 21.2) & (df_y['MEDV'] <= 25)] = 2
    48 df_y.loc[df_y['MEDV'] > 25] = 3
    49 labels = df_y.values.ravel()
    50 
    51 def bench_k_means(estimator, name, data, method):
    52     t0 = time()
    53     estimator.fit(data)
    54 
    55     print('%-9s	%-9s	%.2fs		%.3f			%.3f		%.3f		%.3f			%.3f			%.3f'
    56           % (method, name, (time() - t0), 
    57              metrics.homogeneity_score(labels, estimator.labels_),
    58              metrics.completeness_score(labels, estimator.labels_),
    59              metrics.v_measure_score(labels, estimator.labels_),
    60              metrics.adjusted_rand_score(labels, estimator.labels_),
    61              metrics.adjusted_mutual_info_score(labels,  estimator.labels_,
    62                                                 average_method='arithmetic'),
    63              metrics.silhouette_score(data, estimator.labels_,
    64                                       metric='euclidean',
    65                                       sample_size=300)))
    66 
    67 print(115 * '_')
    68 print('聚类方式		聚类原理		执行时间		同质性得分		完整性评分	v-测量得分	调整后兰德指数	调整的相互信息	轮廓系数')
    69 
    70 # #5.1 KMeans
    71 bench_k_means(KMeans(init='k-means++', n_clusters=n_clusters, n_init=10),
    72               name="质心", data=data, method='KMeans')
    73 
    74 #5.2 KMeasn
    75 bench_k_means(KMeans(init='random', n_clusters=n_clusters, n_init=10),
    76               name="质心", data=data, method='KMeans')
    77 
    78 #5.3 KMeasn
    79 pca = PCA(n_components=n_clusters).fit(data)
    80 bench_k_means(KMeans(init=pca.components_, n_clusters=n_clusters, n_init=1),
    81               name="质心",
    82               data=data, method='KMeans')
    83 
    84 #5.4 MeanShift
    85 bench_k_means(MeanShift(),
    86               name="密度",
    87               data=data, method='MeanShift')
    88 
    89 #5.5 DBSCAN
    90 bench_k_means(DBSCAN(eps=3, min_samples=2),
    91               name="密度",
    92               data=data, method='DBSCAN')
    93 
    94 #5.6 HCA
    95 bench_k_means(AgglomerativeClustering(n_clusters=n_clusters),
    96               name="连通性",
    97               data=data, method='HCA')
    98 print(115 * '_')
  • 相关阅读:
    Promise小结 ES6异步编程
    XLNet模型
    BERT模型
    Transformer模型
    注意力机制(Attention Mechanism)
    序列到序列模型(seq2seq)
    【Pandas-附件2】查询手册
    【Pandas-附件1】读取excle和csv具体函数
    【pandas-21】实践-同比和环比指标
    【pandas-20】实践(泰坦尼克沉船事件)-特征处理
  • 原文地址:https://www.cnblogs.com/asenyang/p/11214725.html
Copyright © 2011-2022 走看看