zoukankan      html  css  js  c++  java
  • FCM 实现以及准确率 精确率 召回率的计算

    import pandas as pd
    import numpy as np
    import random
    import operator
    import math
    
    
    df_full = pd.read_csv("SPECTF_New.csv")
    columns = list(df_full.columns)
    features = columns[:len(columns)-1]
    class_labels = list(df_full[columns[-1]])
    df = df_full[features]
    
    # Number of Attributes
    num_attr = len(df.columns) - 1
    
    # Number of Clusters
    k = 2
    
    # Maximum number of iterations
    MAX_ITER = 100
    
    # Number of data points
    n = len(df)
    
    # Fuzzy parameter
    m = 2.00
    
    def accuracy(cluster_labels, class_labels):
        county = [0,0]
        countn = [0,0]
        tp = [0, 0]
        tn = [0, 0]
        fp = [0, 0]
        fn = [0, 0]
        
        for i in range(len(df)):
            # Yes = 1, No = 0
            if cluster_labels[i] == 1 and class_labels[i] == 'Yes':
                tp[0] = tp[0] + 1
            if cluster_labels[i] == 0 and class_labels[i] == 'No':
                tn[0] = tn[0] + 1
            if cluster_labels[i] == 1 and class_labels[i] == 'No':
                fp[0] = fp[0] + 1
            if cluster_labels[i] == 0 and class_labels[i] == 'Yes':
                fn[0] = fn[0] + 1
        
        for i in range(len(df)):
            # Yes = 0, No = 1
            if cluster_labels[i] == 0 and class_labels[i] == 'Yes':
                tp[1] = tp[1] + 1
            if cluster_labels[i] == 1 and class_labels[i] == 'No':
                tn[1] = tn[1] + 1
            if cluster_labels[i] == 0 and class_labels[i] == 'No':
                fp[1] = fp[1] + 1
            if cluster_labels[i] == 1 and class_labels[i] == 'Yes':
                fn[1] = fn[1] + 1
        
        a0 = float((tp[0] + tn[0]))/(tp[0] + tn[0] + fn[0] + fp[0])
        a1 = float((tp[1] + tn[1]))/(tp[1] + tn[1] + fn[1] + fp[1])
        p0 = float(tp[0])/(tp[0] + fp[0])
        p1 = float(tp[1])/(tp[1] + fp[1])
        r0 = float(tp[0])/(tp[0] + fn[0])
        r1 = float(tp[1])/(tp[1] + fn[1])
        
        accuracy = [a0*100,a1*100]
        precision = [p0*100,p1*100]
        recall = [r0*100,r1*100]
        
        return accuracy, precision, recall
    
    
    def initializeMembershipMatrix():
        membership_mat = list()
        for i in range(n):
            random_num_list = [random.random() for i in range(k)]
            summation = sum(random_num_list)
            temp_list = [x/summation for x in random_num_list]
            membership_mat.append(temp_list)
        return membership_mat
    
    
    def calculateClusterCenter(membership_mat):
        cluster_mem_val = zip(*membership_mat)
        cluster_mem_val = list(cluster_mem_val)
        cluster_centers = list()
        for j in range(k):
            x = list(cluster_mem_val[j])
            xraised = [e ** m for e in x]
            denominator = sum(xraised)
            temp_num = list()
            for i in range(n):
                data_point = list(df.iloc[i])
                prod = [xraised[i] * val for val in data_point]
                temp_num.append(prod)
            numerator = map(sum, zip(*temp_num))
            center = [z/denominator for z in numerator]
            cluster_centers.append(center)
        return cluster_centers
    
    
    def updateMembershipValue(membership_mat, cluster_centers):
        p = float(2/(m-1))
        for i in range(n):
            x = list(df.iloc[i])
            distances = [np.linalg.norm(list(map(operator.sub, x, cluster_centers[j]))) for j in range(k)]
            for j in range(k):
                den = sum([math.pow(float(distances[j]/distances[c]), p) for c in range(k)])
                membership_mat[i][j] = float(1/den)       
        return membership_mat
    
    
    def getClusters(membership_mat):
        cluster_labels = list()
        # print(membership_mat)
        for i in range(n):
            max_val, idx = max((val, idx) for (idx, val) in enumerate(membership_mat[i]))
            cluster_labels.append(idx)
            # print(max_val)
        return cluster_labels
    
    
    def fuzzyCMeansClustering():
        # Membership Matrix
        membership_mat = initializeMembershipMatrix()
        curr = 0
        while curr <= MAX_ITER:
            cluster_centers = calculateClusterCenter(membership_mat)
            membership_mat = updateMembershipValue(membership_mat, cluster_centers)
            cluster_labels = getClusters(membership_mat)
            curr += 1
        # print(membership_mat)
        return cluster_labels, cluster_centers
    
    
    labels, centers = fuzzyCMeansClustering()
    print(labels)
    print(class_labels)
    a,p,r = accuracy(labels, class_labels)
    
    print("Accuracy = " + str(a))
    print("Precision = " + str(p))
    print("Recall = " + str(r))
    
    
  • 相关阅读:
    给js function的参数设置默认值
    如何让windows服务器IIS支持.apk/.ipa文件下载
    Firefox 设置技巧
    在sql中使用了 hashbytes 函数
    SQL Server 查询处理中的各个阶段(SQL执行顺序)
    Jquery 操作 radio选中值
    gradle更新依赖库
    weex第一节-环境搭建
    安装weex-toolkit老是失败的解决办法
    Android透明度颜色值计算
  • 原文地址:https://www.cnblogs.com/princeness/p/11664901.html
Copyright © 2011-2022 走看看