zoukankan      html  css  js  c++  java
  • 基于不同度量准则生成的图形-Graph

    import numpy as np
    import networkx as nx
    import infomap
    import matplotlib.pyplot as plt
    import matplotlib.colors as colors
    from fitter import Fitter
    from scipy.spatial import distance
    from scipy.stats import pearsonr
    from sklearn.metrics.pairwise import rbf_kernel
    from sklearn.neighbors import NearestNeighbors
    
    # 测试样本 data
    m, n = 50, 10# m为样本个数, n为特征维度
    X = 10.0 * np.random.random_sample((m, n))
    
    def simPlotGraph(G):
        pos = nx.kamada_kawai_layout(G)
        nx.draw(G, pos=pos, node_size=200, with_labels=True, node_color='red')
        plt.show()
    
    def get_distributions(data, fitter=False):
        if fitter:
            # 利用fitter拟合数据样本的分布
            # may take some time since by default, all distributions are tried
            # but you call manually provide a smaller set of distributions
            f = Fitter(data, xmin=None, xmax=None, bins=100, distributions=['norm', 't', 'laplace'])
            f.fit()
            f.summary() #返回排序好的分布拟合质量(拟合效果从好到坏),并绘制数据分布和Nbest分布
            f.hist() #绘制组数=bins的标准化直方图
            # f.plot_pdf(names=None, Nbest=3, lw=2) #绘制分布的概率密度函数
            print(f.summary())
        else:
            (n, bins) = np.histogram(data, bins=100, density= True)
            plt.plot(.5*(bins[1:] + bins[:-1]), n)
            plt.show()
    
    # vectori, vectorj = X[0, :], X[1, :]# test
    
    # # minkowski
    def get_nxGraph_minkowski(X, epsw=1.0 / 5.0):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        dis_max = np.max([distance.minkowski(X[i, :], X[j, :], p=3) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis_0 = distance.minkowski(X[i, :], X[j, :], p=3)# p=2 isequivalent to euclidean
                    dis_all.append(dis_0)
                    if dis_0 <= 1e-5:
                        weight = dis_max
                    else:
                        weight = 1.0 / dis_0
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_minkowski(X, epsw=1.0 / 5.0)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # canberra
    def get_nxGraph_canberra(X, epsw=1.0 / 5.0):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        dis_max = np.max([distance.canberra(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = distance.canberra(X[i, :], X[j, :])
                    dis_all.append(dis)
                    if dis <= 1e-5:
                        weight = dis_max
                    else:
                        weight = 1.0 / dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_canberra(X, epsw=1.0 / 5.0)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # braycurtis
    def get_nxGraph_braycurtis(X, epsw=1.0 / 5.0):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        dis_max = np.max([distance.braycurtis(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = distance.braycurtis(X[i, :], X[j, :])
                    dis_all.append(dis)
                    if dis <= 1e-5:
                        weight = dis_max
                    else:
                        weight = 1.0 / dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_braycurtis(X, epsw=1.0 / 5.0)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # chebyshev
    def get_nxGraph_chebyshev(X, epsw=1.0 / 5.0):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        dis_max = np.max([distance.chebyshev(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = distance.chebyshev(X[i, :], X[j, :])
                    dis_all.append(dis)
                    if dis <= 1e-5:
                        weight = dis_max
                    else:
                        weight = 1.0 / dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_chebyshev(X, epsw=1.0 / 5.0)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # cityblock
    def get_nxGraph_cityblock(X, epsw=1.0 / 5.0):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        dis_max = np.max([distance.cityblock(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = distance.cityblock(X[i, :], X[j, :])
                    dis_all.append(dis)
                    if dis <= 1e-5:
                        weight = dis_max
                    else:
                        weight = 1.0 / dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_cityblock(X, epsw=1.0 / 5.0)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # correlation
    def get_nxGraph_correlation(X, epsw=0.5):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        # dis_max = np.max([distance.correlation(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = distance.correlation(X[i, :], X[j, :])
                    dis_all.append(dis)
                    weight = dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_correlation(X, epsw=0.5)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # pearsonr correlation
    def get_nxGraph_pearsonr(X, epsw=0.5):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        # dis_max = np.max([pearsonr(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = np.max(pearsonr(X[i, :], X[j, :]))
                    dis_all.append(dis)
                    weight = dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_pearsonr(X, epsw=0.5)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # cosine
    def get_nxGraph_cosine(X, epsw=0.5):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        # dis_max = np.max([distance.cosine(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = distance.cosine(X[i, :], X[j, :])
                    dis_all.append(dis)
                    weight = dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_cosine(X, epsw=0.5)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # euclidean
    def get_nxGraph_euclidean(X, epsw=1.0 / 5.0):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        dis_max = np.max([distance.euclidean(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = distance.euclidean(X[i, :], X[j, :])
                    dis_all.append(dis)
                    if dis <= 1e-5:
                        weight = dis_max
                    else:
                        weight = 1.0 / dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_euclidean(X, epsw=1.0 / 5.0)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # mahalanobis
    def get_nxGraph_mahalanobis(X, epsw=1.0 / 5.0):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        dis_max = []
        for i in range(m):
            for j in range(n):
                if i != j:
                    vectori, vectorj = X[i, :], X[j, :]
                    vectori_mahala, vectorj_mahala = vectori[:, np.newaxis], vectorj[:, np.newaxis]
                    cov_X = np.cov(np.hstack((vectori_mahala, vectorj_mahala)))
                    dis_mahalanobis = distance.mahalanobis(vectori, vectorj, VI=cov_X)
                    dis_max.append(dis_mahalanobis)
                    dis_all.append(dis_mahalanobis)
        dis_max = np.max(dis_max)
        for i in range(m):
            for j in range(n):
                if i != j:
                    vectori, vectorj = X[i, :], X[j, :]
                    vectori_mahala, vectorj_mahala = vectori[:, np.newaxis], vectorj[:, np.newaxis]
                    cov_X = np.cov(np.hstack((vectori_mahala, vectorj_mahala)))
                    dis = distance.mahalanobis(vectori, vectorj, VI=cov_X)
                    if dis <= 1e-5:
                        weight = dis_max
                    else:
                        weight = 1.0 / dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
    
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_mahalanobis(X, epsw=1.0 / 100.0)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # rbf_kernel Gaussian Similarity # gamma need to be setup.
    def get_nxGraph_rbf_kernel(X, epsw=1/0.01, gamma = 0.025):
        (m, n) = X.shape
        dis_all = []
        edges_list = []
        for i in range(m):
            for j in range(n):
                if i != j:
                    vectori, vectorj = X[i, :], X[j, :]
                    # rbf_kernel Gaussian Similarity
                    X_nm, Y_nm = vectori[np.newaxis, :], vectorj[np.newaxis, :]  # array of shape (n_samples_X, n_features)
                    dis_rbf = rbf_kernel(X=X_nm, Y=Y_nm, gamma=gamma)[0, 0]  # gamma need to be setup.
                    dis_all.append(dis_rbf)
                    if dis_rbf < 1e-2:
                        dis_rbf = 1e-2
                    weight = 1.0/dis_rbf
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_rbf_kernel(X, epsw=1/0.01, gamma = 0.025)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # knn enn
    def get_nxGraph_knn_enn(X, n_neighbors=5, radius=11, epsw=1.0 / 20.0):
        # knn enn
        # n_neighbors = 5
        # radius = 11
        # epsw = 0.5
        dis_all = []
        samples = X
        # algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'} , default='auto' Algorithm used to compute the nearest neighbors
        neigh = NearestNeighbors(n_neighbors=n_neighbors, radius=radius, algorithm='auto', leaf_size=30, metric='minkowski', p=2)# p=2 isequivalent to euclidean
        neigh.fit(samples)
        knn_edges_list = []
        enn_edges_list = []
        for i in range(m):
            v = X[i, :][np.newaxis, :]
    
            knn_dis, knn_node = neigh.kneighbors(X=v, n_neighbors=n_neighbors, return_distance=True)
            knn_dis, knn_node = list(knn_dis[0]), list(knn_node[0])
            dis_all += knn_dis
            index, ind = None, 0
            for nd in knn_node:
                if nd == i:
                    index = ind
                    break
                ind += 1
            if index is not None:
                knn_node.pop(index)
                knn_dis.pop(index)
            index = 0
            for neigh_v in knn_node:
                dis = knn_dis[index]
                if dis < 1e-5:
                    weight = np.max(knn_dis)
                else:
                    weight = 1.0/dis
                if weight >= epsw:
                    edge = (i, neigh_v, {'weight': weight})
                    knn_edges_list.append(edge)
                index += 1
    
            enn_dis, enn_node = neigh.radius_neighbors(X=v, radius=radius, return_distance=True)
            enn_dis, enn_node = list(enn_dis[0]), list(enn_node[0])
            index, ind = None, 0
            for nd in enn_node:
                if nd == i:
                    index = ind
                    break
                ind += 1
            if index is not None:
                enn_node.pop(index)
                enn_dis.pop(index)
    
            for neigh_v in enn_node:
                edge = (i, neigh_v, {'weight': 1.0})
                enn_edges_list.append(edge)
    
        knn_G = nx.Graph()
        knn_G.add_edges_from(knn_edges_list)
        enn_G = nx.Graph()
        enn_G.add_edges_from(enn_edges_list)
    
        return knn_G, enn_G, np.array(dis_all)
    knn_G, enn_G, dis_all = get_nxGraph_knn_enn(X, n_neighbors=5, radius=11, epsw=1.0/20.0)
    simPlotGraph(knn_G)
    simPlotGraph(enn_G)
    get_distributions(dis_all)# 便于找到epsw大小
    

      

    个人学习记录
  • 相关阅读:
    libev & libevent简介
    MyEclipse10+Flash Builder4+BlazeDS+Tomcat7配置J2EE Web项目报错(一)
    增加表空间大小的四种方法
    JavaScript获取某年某月的最后一天
    Not in 改写左连接不需要关注连接列是否重复数据
    自连接
    左链接,右连接
    In,内链接和空值
    HighCharts基本折线图
    NetBeans运行项目报错
  • 原文地址:https://www.cnblogs.com/jeshy/p/15003959.html
Copyright © 2011-2022 走看看