zoukankan      html  css  js  c++  java
  • 聚类------KNN

    import numpy as np
    from math import sqrt
    import operator as opt

    def normData(dataSet):
    maxVals = dataSet.max(axis=0)
    minVals = dataSet.min(axis=0)
    ranges = maxVals - minVals
    retData = (dataSet - minVals) / ranges
    return retData, ranges, minVals

    def kNN(dataSet, labels, testData, k):
    distSquareMat = (dataSet - testData) ** 2 # 计算差值的平方
    distSquareSums = distSquareMat.sum(axis=1) # 求每一行的差值平方和
    distances = distSquareSums ** 0.5 # 开根号,得出每个样本到测试点的距离
    sortedIndices = distances.argsort() # 排序,得到排序后的下标
    indices = sortedIndices[:k] # 取最小的k个
    labelCount = {} # 存储每个label的出现次数
    for i in indices:
    label = labels[i]
    labelCount[label] = labelCount.get(label, 0) + 1 # 次数加一
    sortedCount = sorted(labelCount.items(), key=opt.itemgetter(1), reverse=True) # 对label出现的次数从大到小进行排序
    return sortedCount[0][0] # 返回出现次数最大的label

    if name == "main":
    dataSet = np.array([[2, 3], [6, 8],[1,1],[3,4],[5,6]])
    normDataSet, ranges, minVals = normData(dataSet)
    labels = ['a', 'b','c','a','b']
    testData = np.array([3.9, 5.5])
    normTestData = (testData - minVals) / ranges
    result = kNN(normDataSet, labels, normTestData, 2)
    print(result)

    调用库

    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn.datasets.samples_generator import make_blobs

    X为样本特征,Y为样本簇类别, 共1000个样本,每个样本2个特征,共4个簇,簇中心在[-1,-1], [0,0],[1,1], [2,2], 簇方差分别为[0.4, 0.2, 0.2]

    X, y = make_blobs(n_samples=1000, n_features=2, centers=[[-1,-1], [0,0], [1,1], [2,2]], cluster_std=[0.4, 0.2, 0.2, 0.2],
    random_state =9)
    plt.scatter(X[:, 0], X[:, 1], marker='o')
    plt.show()
    from sklearn.cluster import KMeans
    y_pred = KMeans(n_clusters=3, random_state=9).fit_predict(X)
    plt.scatter(X[:, 0], X[:, 1], c=y_pred)
    plt.show()
    from sklearn import metrics
    metrics.calinski_harabaz_score(X, y_pred)
    from sklearn import metrics
    metrics.calinski_harabaz_score(X, y_pred)

  • 相关阅读:
    CentOS7 安装jdk8
    CentOS7 安装和配置 mysql5.7
    CentOS7 安装和配置Tomcat
    vi编辑器设置行号可见
    前端基础-css(2)
    前端基础-css(1)
    前端基础-html(3)
    前端基础-html(2)
    前端基础-html(1)
    IO多路复用、协程
  • 原文地址:https://www.cnblogs.com/131415-520/p/11784132.html
Copyright © 2011-2022 走看看