zoukankan      html  css  js  c++  java
  • FCM自己修改的代码 (针对论文)

    #!/usr/bin/env python3
    # -*- coding: utf-8 -*-
    """
    Created on Wed Mar 27 10:51:45 2019
    @author: youxinlin
    """
    import copy
    import math
    import random
    import time
    from sklearn.preprocessing import StandardScaler
    from sklearn.preprocessing import MinMaxScaler
    
    global MAX  # 用于初始化隶属度矩阵U
    MAX = 10000.0
    
    global Epsilon  # 结束条件
    Epsilon = 0.0000001
    
    
    def import_data_format_iris(file):
        """
        file这里是输入文件的路径,如iris.txt.
        格式化数据,前四列为data,最后一列为类标号(有0,1,2三类)
        如果是你自己的data,就不需要执行此段函数了。
        """
        data = []
        cluster_location = []
        with open(str(file), 'r') as f:
            for line in f:
                current = line.strip().split(",")  # 对每一行以逗号为分割,返回一个list
                current_dummy = []
                for j in range(0, len(current) - 1):
                    current_dummy.append(float(current[j]))  # current_dummy存放data
    
                # 下面注这段话提供了一个范例:若类标号不是0,1,2之类数字时该怎么给数据集
                j += 1
    
                cluster_location.append(current[j])
    
                data.append(current_dummy)
                # print(data)
                # print(cluster_location)
                # print(len(data))
                # print(len(cluster_location))
                # print(data)
        print("加载数据完毕")
        return data, cluster_location
    
    
    
    def randomize_data(data):
        """
        该功能将数据随机化,并保持随机化顺序的记录
        """
        order = list(range(0, len(data)))
        random.shuffle(order)
        new_data = [[] for i in range(0, len(data))]
        for index in range(0, len(order)):
            new_data[index] = data[order[index]]
        # print(new_data)
        return new_data,   order
    
    
    def de_randomise_data(data, order):
        """
        此函数将返回数据的原始顺序,将randomise_data()返回的order列表作为参数
        """
        new_data = [[] for i in range(0, len(data))]
        for index in range(len(order)):
            new_data[order[index]] = data[index]
        return new_data
    
    
    def print_matrix(list):
        """
        以可重复的方式打印矩阵
        """
        for i in range(0, len(list)):
            print(list[i])
    
    
    def initialize_U(data, cluster_number):
        """
        这个函数是隶属度矩阵U的每行加起来都为1. 此处需要一个全局变量MAX.
        """
        global MAX
        U = []
        for i in range(0, len(data)):
            current = []
            rand_sum = 0.0
            for j in range(0, cluster_number):
                dummy = random.randint(1, int(MAX))
                current.append(dummy)
                rand_sum += dummy
            for j in range(0, cluster_number):
                current[j] = current[j] / rand_sum
            U.append(current)
        return U
    
    
    def distance(point, center):
        """
        该函数计算2点之间的距离(作为列表)。我们指欧几里德距离。闵可夫斯基距离
        """
        if len(point) != len(center):
            return -1
        dummy = 0.0
        for i in range(0, len(point)):
            dummy += abs(point[i] - center[i]) ** 2
        return math.sqrt(dummy)
    
    
    def end_conditon(U, U_old):
        """
    	结束条件。当U矩阵随着连续迭代停止变化时,触发结束
    	"""
        global Epsilon
        for i in range(0, len(U)):
            for j in range(0, len(U[0])):
                if abs(U[i][j] - U_old[i][j]) > Epsilon:
                    return False
        return True
    
    
    def normalise_U(U):
        """
        在聚类结束时使U模糊化。每个样本的隶属度最大的为1,其余为0
        """
        for i in range(0, len(U)):
            maximum = max(U[i])
            for j in range(0, len(U[0])):
                if U[i][j] != maximum:
                    U[i][j] = 0
                else:
                    U[i][j] = 1
        return U
    
    
    # m的最佳取值范围为[1.5,2.5]
    def fuzzy(data, cluster_number, m):
        """
        这是主函数,它将计算所需的聚类中心,并返回最终的归一化隶属矩阵U.
        参数是:簇数(cluster_number)和隶属度的因子(m)
        """
        # 初始化隶属度矩阵U
        U = initialize_U(data, cluster_number)
        # print_matrix(U)
        # 循环更新U
        while (True):
            # 创建它的副本,以检查结束条件
            U_old = copy.deepcopy(U)
            # 计算聚类中心
            C = []
            for j in range(0, cluster_number):
                current_cluster_center = []
                for i in range(0, len(data[0])):
                    dummy_sum_num = 0.0
                    dummy_sum_dum = 0.0
                    for k in range(0, len(data)):
                        # 分子
                        dummy_sum_num += (U[k][j] ** m) * data[k][i]
                        # 分母
                        dummy_sum_dum += (U[k][j] ** m)
                    # 第i列的聚类中心
                    current_cluster_center.append(dummy_sum_num / dummy_sum_dum)
                # 第j簇的所有聚类中心
                C.append(current_cluster_center)
    
            # 创建一个距离向量, 用于计算U矩阵。
            distance_matrix = []
            for i in range(0, len(data)):
                current = []
                for j in range(0, cluster_number):
                    current.append(distance(data[i], C[j]))
                distance_matrix.append(current)
    
            # 更新U
            for j in range(0, cluster_number):
                for i in range(0, len(data)):
                    dummy = 0.0
                    for k in range(0, cluster_number):
                        # 分母
                        dummy += (distance_matrix[i][j] / distance_matrix[i][k]) ** (2 / (m - 1))
                    U[i][j] = 1 / dummy
    
            if end_conditon(U, U_old):
                print("结束聚类")
                break
        print("标准化 U")
        U = normalise_U(U)
        return U
    
    
    # def checker_iris(final_location):
    #     """
    #     和真实的聚类结果进行校验比对
    #     """
    #     right = 0.0
    #     for k in range(0, 2):
    #         checker = [0, 0, 0]
    #         for i in range(0, 50):
    #             for j in range(0, len(final_location[0])):
    #                 if final_location[i + (50 * k)][j] == 1:  # i+(50*k)表示 j表示第j类
    #                     checker[j] += 1  # checker分别统计每一类分类正确的个数
    #         right += max(checker)  # 累加分类正确的个数
    #     print('分类正确的个数是:', right)
    #     answer = right / 150 * 100
    #     return "准确率:" + str(answer) + "%"
    #
    # 计算每一簇的数据量
    def tongji(final_location,cluster_number):
        new_data = [0 for i in range(0, cluster_number)]
        for i in range(0, len(final_location)):
            for j in range(0, cluster_number):
                if final_location[i][j] == 1:
                    new_data[j] += 1
        return new_data
    
    # 得到每个数据属于聚类中的哪一个种类
    
    def getClusters(membership_mat,data):
        cluster_labels = list()
        # print(membership_mat)
        for i in range(len(data)):
            max_val, idx = max((val, idx) for (idx, val) in enumerate(membership_mat[i]))
            cluster_labels.append(idx)
            # print(max_val)
        return cluster_labels
    # 把数据属于哪一个种类归为正常(0)异常(1)
    
    def suibian(real_label,cluster_number):
    
        a=list()
        for p in range(0,cluster_number):
            j = 0
            for i in range(0,len(real_label)):
                if real_label[i] == p:
                    j+=1
            a.append(j)
    
            if (a[p] > int(len(real_label) / (2 * cluster_number))):
                    for i in range(0, len(real_label)):
                        if real_label[i] == p:
                            real_label[i] = 0
            else:
                for i in range(0, len(real_label)):
                    if real_label[i] == p:
                       real_label[i] = 1
    
        q=0
        b=list()
        for u in range(0, len(real_label)):
            if real_label[u] == 0:
                q+=1
        b.append(q)
        print(b)
        print(a)
        print(real_label)
        print(len(real_label) / (2 * cluster_number))
        return real_label
    
    
    def accuracy(cluster_labels, class_labels,data):
        county = [0, 0]
        countn = [0, 0]
        tp = [0, 0]
        tn = [0, 0]
        fp = [0, 0]
        fn = [0, 0]
    
        for i in range(len(data)):
            # Yes = 1, No = 0
            if cluster_labels[i] == 1 and class_labels[i] == '1':
                tp[0] = tp[0] + 1
            if cluster_labels[i] == 0 and class_labels[i] == '0':
                tn[0] = tn[0] + 1
            if cluster_labels[i] == 1 and class_labels[i] == '0':
                fp[0] = fp[0] + 1
            if cluster_labels[i] == 0 and class_labels[i] == '1':
                fn[0] = fn[0] + 1
    
        for i in range(len(data)):
            # Yes = 0, No = 1
            if cluster_labels[i] == 0 and class_labels[i] == '0':
                tp[1] = tp[1] + 1
            if cluster_labels[i] == 1 and class_labels[i] == '1':
                tn[1] = tn[1] + 1
            if cluster_labels[i] == 0 and class_labels[i] == '1':
                fp[1] = fp[1] + 1
            if cluster_labels[i] == 1 and class_labels[i] == '0':
                fn[1] = fn[1] + 1
    
        a0 = float((tp[0] + tn[0])) / (tp[0] + tn[0] + fn[0] + fp[0])
        a1 = float((tp[1] + tn[1])) / (tp[1] + tn[1] + fn[1] + fp[1])
        p0 = float(tp[0]) / (tp[0] + fp[0])
        p1 = float(tp[1]) / (tp[1] + fp[1])
        r0 = float(tp[0]) / (tp[0] + fn[0])
        r1 = float(tp[1]) / (tp[1] + fn[1])
    
        accuracy = [a0 * 100, a1 * 100]
        precision = [p0 * 100, p1 * 100]
        recall = [r0 * 100, r1 * 100]
    
        return accuracy, precision, recall
    
    
    if __name__ == '__main__':
        # 加载数据
        data, cluster_location= import_data_format_iris("data1--fcm(1).csv")
        # print_matrix(data)
    
        # 随机化数据
        data, order = randomize_data(data)
        # print_matrix(data)
        ss = StandardScaler()
        data_1 = ss.fit_transform(data)
        start = time.time()
        # 现在我们有一个名为data的列表,它只是数字
        # 我们还有另一个名为cluster_location的列表,它给出了正确的聚类结果位置
        # 调用模糊C均值函数
        final_location = fuzzy(data_1, 10, 2)
    
        # 还原数据
        final_location = de_randomise_data(final_location, order)
        #	print_matrix(final_location)
        real_label = getClusters(final_location,data_1)
    
        print(real_label)
    
        zuizhong_label = suibian(real_label,10)
        # print(len(real_label))
        # 准确度分析
        # 准确度分析
        a,p,r =  accuracy(zuizhong_label, cluster_location,data)
    
    
        new_data=tongji(final_location,10)
        print(new_data)
    
    
        print("Accuracy = " + str(a))
        print("Precision = " + str(p))
        print("Recall = " + str(r))
    
        print("用时:{0}".format(time.time() - start))
    
    
  • 相关阅读:
    XML语法
    C/C++对MySQL操作
    HDU 3966 Aragorn's Story
    SPOJ 375 Query on a tree
    SPOJ 913 Query on a tree II
    SPOJ 6779 Can you answer these queries VII
    URAL 1471 Tree
    SPOJ 2798 Query on a tree again!
    POJ 3237 Tree
    SPOJ 4487 Can you answer these queries VI
  • 原文地址:https://www.cnblogs.com/princeness/p/11664900.html
Copyright © 2011-2022 走看看