zoukankan      html  css  js  c++  java
  • 基于不同度量准则生成的图形-Graph

    import numpy as np
    import networkx as nx
    import infomap
    import matplotlib.pyplot as plt
    import matplotlib.colors as colors
    from fitter import Fitter
    from scipy.spatial import distance
    from scipy.stats import pearsonr
    from sklearn.metrics.pairwise import rbf_kernel
    from sklearn.neighbors import NearestNeighbors
    
    # 测试样本 data
    m, n = 50, 10# m为样本个数, n为特征维度
    X = 10.0 * np.random.random_sample((m, n))
    
    def simPlotGraph(G):
        pos = nx.kamada_kawai_layout(G)
        nx.draw(G, pos=pos, node_size=200, with_labels=True, node_color='red')
        plt.show()
    
    def get_distributions(data, fitter=False):
        if fitter:
            # 利用fitter拟合数据样本的分布
            # may take some time since by default, all distributions are tried
            # but you call manually provide a smaller set of distributions
            f = Fitter(data, xmin=None, xmax=None, bins=100, distributions=['norm', 't', 'laplace'])
            f.fit()
            f.summary() #返回排序好的分布拟合质量(拟合效果从好到坏),并绘制数据分布和Nbest分布
            f.hist() #绘制组数=bins的标准化直方图
            # f.plot_pdf(names=None, Nbest=3, lw=2) #绘制分布的概率密度函数
            print(f.summary())
        else:
            (n, bins) = np.histogram(data, bins=100, density= True)
            plt.plot(.5*(bins[1:] + bins[:-1]), n)
            plt.show()
    
    # vectori, vectorj = X[0, :], X[1, :]# test
    
    # # minkowski
    def get_nxGraph_minkowski(X, epsw=1.0 / 5.0):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        dis_max = np.max([distance.minkowski(X[i, :], X[j, :], p=3) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis_0 = distance.minkowski(X[i, :], X[j, :], p=3)# p=2 isequivalent to euclidean
                    dis_all.append(dis_0)
                    if dis_0 <= 1e-5:
                        weight = dis_max
                    else:
                        weight = 1.0 / dis_0
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_minkowski(X, epsw=1.0 / 5.0)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # canberra
    def get_nxGraph_canberra(X, epsw=1.0 / 5.0):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        dis_max = np.max([distance.canberra(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = distance.canberra(X[i, :], X[j, :])
                    dis_all.append(dis)
                    if dis <= 1e-5:
                        weight = dis_max
                    else:
                        weight = 1.0 / dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_canberra(X, epsw=1.0 / 5.0)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # braycurtis
    def get_nxGraph_braycurtis(X, epsw=1.0 / 5.0):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        dis_max = np.max([distance.braycurtis(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = distance.braycurtis(X[i, :], X[j, :])
                    dis_all.append(dis)
                    if dis <= 1e-5:
                        weight = dis_max
                    else:
                        weight = 1.0 / dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_braycurtis(X, epsw=1.0 / 5.0)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # chebyshev
    def get_nxGraph_chebyshev(X, epsw=1.0 / 5.0):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        dis_max = np.max([distance.chebyshev(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = distance.chebyshev(X[i, :], X[j, :])
                    dis_all.append(dis)
                    if dis <= 1e-5:
                        weight = dis_max
                    else:
                        weight = 1.0 / dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_chebyshev(X, epsw=1.0 / 5.0)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # cityblock
    def get_nxGraph_cityblock(X, epsw=1.0 / 5.0):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        dis_max = np.max([distance.cityblock(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = distance.cityblock(X[i, :], X[j, :])
                    dis_all.append(dis)
                    if dis <= 1e-5:
                        weight = dis_max
                    else:
                        weight = 1.0 / dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_cityblock(X, epsw=1.0 / 5.0)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # correlation
    def get_nxGraph_correlation(X, epsw=0.5):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        # dis_max = np.max([distance.correlation(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = distance.correlation(X[i, :], X[j, :])
                    dis_all.append(dis)
                    weight = dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_correlation(X, epsw=0.5)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # pearsonr correlation
    def get_nxGraph_pearsonr(X, epsw=0.5):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        # dis_max = np.max([pearsonr(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = np.max(pearsonr(X[i, :], X[j, :]))
                    dis_all.append(dis)
                    weight = dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_pearsonr(X, epsw=0.5)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # cosine
    def get_nxGraph_cosine(X, epsw=0.5):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        # dis_max = np.max([distance.cosine(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = distance.cosine(X[i, :], X[j, :])
                    dis_all.append(dis)
                    weight = dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_cosine(X, epsw=0.5)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # euclidean
    def get_nxGraph_euclidean(X, epsw=1.0 / 5.0):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        dis_max = np.max([distance.euclidean(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j])
        for i in range(m):
            for j in range(n):
                if i != j:
                    dis = distance.euclidean(X[i, :], X[j, :])
                    dis_all.append(dis)
                    if dis <= 1e-5:
                        weight = dis_max
                    else:
                        weight = 1.0 / dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_euclidean(X, epsw=1.0 / 5.0)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # mahalanobis
    def get_nxGraph_mahalanobis(X, epsw=1.0 / 5.0):
        (m, n) = X.shape
        edges_list = []
        dis_all = []
        dis_max = []
        for i in range(m):
            for j in range(n):
                if i != j:
                    vectori, vectorj = X[i, :], X[j, :]
                    vectori_mahala, vectorj_mahala = vectori[:, np.newaxis], vectorj[:, np.newaxis]
                    cov_X = np.cov(np.hstack((vectori_mahala, vectorj_mahala)))
                    dis_mahalanobis = distance.mahalanobis(vectori, vectorj, VI=cov_X)
                    dis_max.append(dis_mahalanobis)
                    dis_all.append(dis_mahalanobis)
        dis_max = np.max(dis_max)
        for i in range(m):
            for j in range(n):
                if i != j:
                    vectori, vectorj = X[i, :], X[j, :]
                    vectori_mahala, vectorj_mahala = vectori[:, np.newaxis], vectorj[:, np.newaxis]
                    cov_X = np.cov(np.hstack((vectori_mahala, vectorj_mahala)))
                    dis = distance.mahalanobis(vectori, vectorj, VI=cov_X)
                    if dis <= 1e-5:
                        weight = dis_max
                    else:
                        weight = 1.0 / dis
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
    
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_mahalanobis(X, epsw=1.0 / 100.0)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # # rbf_kernel Gaussian Similarity # gamma need to be setup.
    def get_nxGraph_rbf_kernel(X, epsw=1/0.01, gamma = 0.025):
        (m, n) = X.shape
        dis_all = []
        edges_list = []
        for i in range(m):
            for j in range(n):
                if i != j:
                    vectori, vectorj = X[i, :], X[j, :]
                    # rbf_kernel Gaussian Similarity
                    X_nm, Y_nm = vectori[np.newaxis, :], vectorj[np.newaxis, :]  # array of shape (n_samples_X, n_features)
                    dis_rbf = rbf_kernel(X=X_nm, Y=Y_nm, gamma=gamma)[0, 0]  # gamma need to be setup.
                    dis_all.append(dis_rbf)
                    if dis_rbf < 1e-2:
                        dis_rbf = 1e-2
                    weight = 1.0/dis_rbf
                    if weight >= epsw:
                        edge = (i, j, {'weight': weight})
                        edges_list.append(edge)
        G = nx.Graph()
        G.add_edges_from(edges_list)
        return G, np.array(dis_all)
    # G, dis_all = get_nxGraph_rbf_kernel(X, epsw=1/0.01, gamma = 0.025)
    # simPlotGraph(G)
    # get_distributions(dis_all)# 便于找到epsw大小
    
    # knn enn
    def get_nxGraph_knn_enn(X, n_neighbors=5, radius=11, epsw=1.0 / 20.0):
        # knn enn
        # n_neighbors = 5
        # radius = 11
        # epsw = 0.5
        dis_all = []
        samples = X
        # algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'} , default='auto' Algorithm used to compute the nearest neighbors
        neigh = NearestNeighbors(n_neighbors=n_neighbors, radius=radius, algorithm='auto', leaf_size=30, metric='minkowski', p=2)# p=2 isequivalent to euclidean
        neigh.fit(samples)
        knn_edges_list = []
        enn_edges_list = []
        for i in range(m):
            v = X[i, :][np.newaxis, :]
    
            knn_dis, knn_node = neigh.kneighbors(X=v, n_neighbors=n_neighbors, return_distance=True)
            knn_dis, knn_node = list(knn_dis[0]), list(knn_node[0])
            dis_all += knn_dis
            index, ind = None, 0
            for nd in knn_node:
                if nd == i:
                    index = ind
                    break
                ind += 1
            if index is not None:
                knn_node.pop(index)
                knn_dis.pop(index)
            index = 0
            for neigh_v in knn_node:
                dis = knn_dis[index]
                if dis < 1e-5:
                    weight = np.max(knn_dis)
                else:
                    weight = 1.0/dis
                if weight >= epsw:
                    edge = (i, neigh_v, {'weight': weight})
                    knn_edges_list.append(edge)
                index += 1
    
            enn_dis, enn_node = neigh.radius_neighbors(X=v, radius=radius, return_distance=True)
            enn_dis, enn_node = list(enn_dis[0]), list(enn_node[0])
            index, ind = None, 0
            for nd in enn_node:
                if nd == i:
                    index = ind
                    break
                ind += 1
            if index is not None:
                enn_node.pop(index)
                enn_dis.pop(index)
    
            for neigh_v in enn_node:
                edge = (i, neigh_v, {'weight': 1.0})
                enn_edges_list.append(edge)
    
        knn_G = nx.Graph()
        knn_G.add_edges_from(knn_edges_list)
        enn_G = nx.Graph()
        enn_G.add_edges_from(enn_edges_list)
    
        return knn_G, enn_G, np.array(dis_all)
    knn_G, enn_G, dis_all = get_nxGraph_knn_enn(X, n_neighbors=5, radius=11, epsw=1.0/20.0)
    simPlotGraph(knn_G)
    simPlotGraph(enn_G)
    get_distributions(dis_all)# 便于找到epsw大小
    

      

    个人学习记录
  • 相关阅读:
    Windows API一日一练(55)FlushFileBuffers和SetFilePointer函数
    JDBC连接MySQL数据库及演示样例
    FusionCharts简单教程(一)---建立第一个FusionCharts图形
    破解中国电信华为无线猫路由(HG522-C)自己主动拨号+不限电脑数+iTV
    DB9 公头母头引脚定义及连接
    第二届战神杯线上编程挑战赛月赛第一题:回文数
    白话经典算法系列之七 堆与堆排序
    开发人员程序猿10大聚集地
    对不起,说句粗话——这个太屌了,windows1.0安装程序(附下载)
    Arduino入门套件 Arduino UNO R3
  • 原文地址:https://www.cnblogs.com/jeshy/p/15003959.html
Copyright © 2011-2022 走看看