zoukankan      html  css  js  c++  java
  • 灰色关联分析法(GRA)实现

    GRA算法步骤

    step1:确定参考序列 (x_0) 和比较序列 (x_i)

    step2:对原始数据变换,将第 i 个属性的第 k 个数据 (x_i(k)) 转换为 (y_i(k)) 。方法有:

    • 初值变换: (y_i(k) = frac{x_i(k)}{x_i(1)}).
    • 均值变换: (y_i(k) = frac{x_i(k)}{overline{x}_i}).
    • 百分比变换(变换成小于1的数):(y_i(k) = frac{x_i(k)}{max_k {x}_i(k)}) .
    • 倍数变换(变换成大于1的数):(y_i(k) = frac{x_i(k)}{min_k {x}_i(k)}) .
    • 归一化变换:(y_i(k) = frac{x_i(k)}{∑_k {x}_i(k)}) .
    • 区间变换:(y_i(k) = frac{x_i(k) - min_k {x}_i(k)}{max_k {x}_i(k) - min_k {x}_i(k)}) .

    step3:求绝对差序列,即比较序列与参考序列的差值 $△_{0i}(k) =|x_0(k)-x_i(k)| $ .

    step4:使用下面公式计算灰关联系数,(γ(x_0(k),x_i(k))=frac{△_{min}+ρ△_{max}}{△_{0i}(k)+ρ△_{max}}),一般取分辨系数ρ=0.5。

    step5:使用下面公式计算灰关联度,$γ(x_0,x_i)=frac{1}{N}∑_{k=1}^N w_kγ(x_0(k),x_i(k)) $ ,wk为第k条数据权重。

    step6:得到关键属性

    • 如果参考序列 ({x_0}) 为最优值数据列,那么灰关联度 (γ(x_0,x_i)) 越大,则第 i 个属性越好;
    • 如果参考序列 ({x_0}) 为最劣值数据列,那么灰关联度 (γ(x_0,x_i)) 越大,则第 i 个属性越不好;

    GRA算法的Python实现

    import numpy as np
    import pandas as pd
    import seaborn as sns
    import matplotlib.pyplot as plt
    from scipy.sparse import issparse
    
    
    ## 矩阵检查,必须是数据型,必须是二维的
    def check_array(array, dtype="numeric"):
        if issparse(array):
            raise TypeError('PCA does not support sparse input.')  # 不接受稀疏矩阵
    
        if array.ndim != 2:
            raise ValueError('Expected 2D array.')  # 只接受二维矩阵
    
        array = np.array(array, dtype=np.float64)
        return array
    
    
    
    class GRA():
        def __init__(self, k=0, norm_method='norm', rho=0.5):
            self.k = k
            self.norm_method = norm_method
            self.rho = rho
    
    
        def fit(self, X, k=None):
            ''' 与单个参考序列比较 '''
            if not k==None: self.k = k
            X = check_array(X)
            Y = self.__normalization(X) #归一化
            self.r = self.__calculation_relevancy(Y)
            return self.r
    
    
        def fit_all(self, X):
            ''' 所有序列依次为参考序列,互相比较 '''
            X = check_array(X)
            self.data = np.zeros([X.shape[1], X.shape[1]])
            for k in range(X.shape[1]):
                self.k = k
                Y = self.__normalization(X) #归一化
                r = self.__calculation_relevancy(Y)
                self.data[:,k] = r
            return self.data
    
    
        def __normalization(self, X):
            if self.norm_method == 'mean':
                Y = self.__mean(X)
            elif self.norm_method == 'initial':
                Y = self.__initial(X)
            elif self.norm_method == 'norm':
                Y = self.__norm(X)
            elif self.norm_method == 'section':
                Y = self.__section(X)
            elif self.norm_method == 'max':
                Y = self.__max(X)
            elif self.norm_method == 'min':
                Y = self.__min(X)
            else:
                raise ValueError("Unrecognized norm_method='{0}'".format(self.norm_method))
            print(Y)
            return Y
    
    
        def __norm(self, X):
            Xsum = np.sum(X, axis=0)
            for i in range(X.shape[1]):
                X[:, i] = X[:, i]/Xsum[i]
            return X
    
    
        def __mean(self, X):
            ''' 平均值归一化 '''
            Xmean = np.mean(X, axis=0, keepdims=True) #每一列的平均
            for i in range(X.shape[1]):
                X[:, i] = X[:, i]/Xmean[0][i]
            return X
    
    
        def __initial(self, X):
            ''' 初值归一化 '''
            X0 = X[0,:]
            for i in range(X.shape[1]):
                X[:, i] = X[:, i]/X0[i]
            return X
    
    
        def __max(self, X):
            ''' 百分比归一化 '''
            Xmax = np.max(X, axis=0)
            for i in range(X.shape[1]):
                X[:, i] = X[:, i]/Xmax[i]
            return X
    
    
        def __min(self, X):
            ''' 倍数归一化 '''
            Xmin = np.min(X, axis=0)
            for i in range(X.shape[1]):
                X[:, i] = X[:, i]/(Xmin[i]+0.000001) #避免除数为零
            return X
    
    
        def __section(self, X):
            Xmax = np.max(X, axis=0)
            Xmin = np.min(X, axis=0)
            for i in range(X.shape[1]):
                X[:, i] = (X[:, i]-Xmin[i])/(Xmax[i]-Xmin[i])
            return X
    
    
        def __calculation_relevancy(self, X):
            ''' 计算关联度 '''
            # 计算参考序列与比较序列差值
            Delta = np.zeros((X.shape))
            for i in range(X.shape[1]):
                Delta[:, i] = np.fabs(X[:, i]-X[:, self.k])
    
            # 计算关联系数
            t = np.delete(Delta, self.k, axis=1)
            mmax=t.max().max()
            mmin=t.min().min()
            ksi=((mmin+self.rho*mmax)/(Delta+self.rho*mmax))
    
            # 计算关联度
            r = ksi.sum(axis=0) / ksi.shape[0]
            return r
    
    
        def sort_comparison(self):
            idxs = np.argsort(-self.r)
            data = []
            for idx in idxs:
                if idx == self.k: continue
                data.append(['第{}个特征'.format(idx), self.r[idx]])
            df = pd.DataFrame(
                data=np.array(data),
                columns=['特征','相关度'],
                index=[f"{i+1}" for i in range(len(data))],
            )
            print('
    与第{}个特征相关度的从大到小排序:'.format(self.k))
            print(df)
            return df
    
    
        def ShowGRAHeatMap(self):
            ''' 灰色关联结果矩阵可视化 '''
            df = pd.DataFrame(
                data=self.data,
                columns=[f"{i}" for i in range(self.data.shape[1])],
                index=[f"{i}" for i in range(self.data.shape[0])],
            )
            colormap = plt.cm.RdBu
            plt.figure()
            plt.title('Pearson Correlation of Features')
            sns.heatmap(df.astype(float), linewidths=0.1, vmax=1.0, square=True, cmap=colormap, linecolor='white', annot=True)
            plt.show()
    

    接口调用

    test 1

    import numpy as np
    
    X = np.array([[0.732, 0.646, 0.636, 0.598, 0.627],
    [0.038, 0.031, 0.042, 0.036, 0.043],
    [0.507, 0.451, 0.448, 0.411, 0.122],
    [0.048, 0.034, 0.030, 0.030, 0.031],
    [183.25, 207.28, 240.98, 290.80, 370.00],
    [24.03, 44.98, 62.79, 83.44, 127.22],
    [85508, 74313, 85966, 100554, 109804],
    [175.87, 175.72, 183.69, 277.11, 521.26],
    [10, 13, 13, 1, 1],])
    
    
    X=X.T # 每一行为一条记录,每一列为一个特征数据
    print(X.shape)
    print(X)
    
    gra = GRA(k=0, norm_method='min')
    r = gra.fit(X)
    print(r)
    gra.sort_comparison()
    

    tese 2

    from GRA import GRA
    import numpy as np
    
    X = np.array([[0.732, 0.646, 0.636, 0.598, 0.627],
    [0.038, 0.031, 0.042, 0.036, 0.043],
    [0.507, 0.451, 0.448, 0.411, 0.122],
    [0.048, 0.034, 0.030, 0.030, 0.031],
    [183.25, 207.28, 240.98, 290.80, 370.00],
    [24.03, 44.98, 62.79, 83.44, 127.22],
    [85508, 74313, 85966, 100554, 109804],
    [175.87, 175.72, 183.69, 277.11, 521.26],
    [10, 13, 13, 1, 1],])
    
    gra = GRA(norm_method='initial')
    data = gra.fit_all(X)
    print(data)
    gra.ShowGRAHeatMap()
    

    参考:

  • 相关阅读:
    文件权限
    函数指针
    位操作
    宏定义
    GNU gcc常用编译选项
    SHELL编程(六)---------数组和函数
    26.使用中间件的方式包装日志输出
    25.最基本的日志输出方式,内置日志包使用
    24.把熔断器整合到我们的客户端代码中(初步)
    23.熔断器学习,熔断器的三种状态,状态获取
  • 原文地址:https://www.cnblogs.com/yejifeng/p/13152728.html
Copyright © 2011-2022 走看看