zoukankan      html  css  js  c++  java
  • 凝聚层次聚类

    import numpy as np
    import pandas as pd
    import matplotlib.pylab as plt
    import matplotlib
    from sklearn import datasets
    import scipy.cluster.hierarchy as sch
    matplotlib.rcParams['font.sans-serif'] = ['SimHei']
    data=pd.read_csv('023-Mall_Customers.csv')
    X = data.iloc[:,[3,4]].values
    # print(X)
    dendrogram = sch.dendrogram(sch.linkage(y=X,method='ward')) # ward:两个簇类的平方差的总和

    # 构建凝聚层次聚类模型

    from sklearn.cluster import AgglomerativeClustering
    agg = AgglomerativeClustering(n_clusters=5,affinity='euclidean',linkage='ward')
    agg.fit(X)
    labels = agg.labels_ # 打印标签
    print(agg.n_leaves_) # 获取叶子数
    # 数据可视化
    for i in labels:
    plt.scatter(X[labels == i,0],X[labels == i,1],marker='o')

    # 评估
    from sklearn.metrics import silhouette_score

    si_score = silhouette_score(X,agg.labels_,metric='euclidean',sample_size=len(X))
    print('si_score:{:.4f}'.format(si_score))

  • 相关阅读:
    sql语句添加查询字段
    SqlServer Case when then用法总结
    单例与多线程
    HttpSession详解
    范式
    SQL语句中的Having子句与where子句
    HTTP无状态
    字节流与字符流的区别
    选择排序
    ReentrantLock VS synchronized
  • 原文地址:https://www.cnblogs.com/txb1999/p/10738456.html
Copyright © 2011-2022 走看看