import numpy as np import networkx as nx import infomap import matplotlib.pyplot as plt import matplotlib.colors as colors from fitter import Fitter from scipy.spatial import distance from scipy.stats import pearsonr from sklearn.metrics.pairwise import rbf_kernel from sklearn.neighbors import NearestNeighbors # 测试样本 data m, n = 50, 10# m为样本个数, n为特征维度 X = 10.0 * np.random.random_sample((m, n)) def simPlotGraph(G): pos = nx.kamada_kawai_layout(G) nx.draw(G, pos=pos, node_size=200, with_labels=True, node_color='red') plt.show() def get_distributions(data, fitter=False): if fitter: # 利用fitter拟合数据样本的分布 # may take some time since by default, all distributions are tried # but you call manually provide a smaller set of distributions f = Fitter(data, xmin=None, xmax=None, bins=100, distributions=['norm', 't', 'laplace']) f.fit() f.summary() #返回排序好的分布拟合质量(拟合效果从好到坏),并绘制数据分布和Nbest分布 f.hist() #绘制组数=bins的标准化直方图 # f.plot_pdf(names=None, Nbest=3, lw=2) #绘制分布的概率密度函数 print(f.summary()) else: (n, bins) = np.histogram(data, bins=100, density= True) plt.plot(.5*(bins[1:] + bins[:-1]), n) plt.show() # vectori, vectorj = X[0, :], X[1, :]# test # # minkowski def get_nxGraph_minkowski(X, epsw=1.0 / 5.0): (m, n) = X.shape edges_list = [] dis_all = [] dis_max = np.max([distance.minkowski(X[i, :], X[j, :], p=3) for i in range(m) for j in range(n) if i != j]) for i in range(m): for j in range(n): if i != j: dis_0 = distance.minkowski(X[i, :], X[j, :], p=3)# p=2 isequivalent to euclidean dis_all.append(dis_0) if dis_0 <= 1e-5: weight = dis_max else: weight = 1.0 / dis_0 if weight >= epsw: edge = (i, j, {'weight': weight}) edges_list.append(edge) G = nx.Graph() G.add_edges_from(edges_list) return G, np.array(dis_all) # G, dis_all = get_nxGraph_minkowski(X, epsw=1.0 / 5.0) # simPlotGraph(G) # get_distributions(dis_all)# 便于找到epsw大小 # # canberra def get_nxGraph_canberra(X, epsw=1.0 / 5.0): (m, n) = X.shape edges_list = [] dis_all = [] dis_max = np.max([distance.canberra(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j]) for i in range(m): for j in range(n): if i != j: dis = distance.canberra(X[i, :], X[j, :]) dis_all.append(dis) if dis <= 1e-5: weight = dis_max else: weight = 1.0 / dis if weight >= epsw: edge = (i, j, {'weight': weight}) edges_list.append(edge) G = nx.Graph() G.add_edges_from(edges_list) return G, np.array(dis_all) # G, dis_all = get_nxGraph_canberra(X, epsw=1.0 / 5.0) # simPlotGraph(G) # get_distributions(dis_all)# 便于找到epsw大小 # # braycurtis def get_nxGraph_braycurtis(X, epsw=1.0 / 5.0): (m, n) = X.shape edges_list = [] dis_all = [] dis_max = np.max([distance.braycurtis(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j]) for i in range(m): for j in range(n): if i != j: dis = distance.braycurtis(X[i, :], X[j, :]) dis_all.append(dis) if dis <= 1e-5: weight = dis_max else: weight = 1.0 / dis if weight >= epsw: edge = (i, j, {'weight': weight}) edges_list.append(edge) G = nx.Graph() G.add_edges_from(edges_list) return G, np.array(dis_all) # G, dis_all = get_nxGraph_braycurtis(X, epsw=1.0 / 5.0) # simPlotGraph(G) # get_distributions(dis_all)# 便于找到epsw大小 # # chebyshev def get_nxGraph_chebyshev(X, epsw=1.0 / 5.0): (m, n) = X.shape edges_list = [] dis_all = [] dis_max = np.max([distance.chebyshev(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j]) for i in range(m): for j in range(n): if i != j: dis = distance.chebyshev(X[i, :], X[j, :]) dis_all.append(dis) if dis <= 1e-5: weight = dis_max else: weight = 1.0 / dis if weight >= epsw: edge = (i, j, {'weight': weight}) edges_list.append(edge) G = nx.Graph() G.add_edges_from(edges_list) return G, np.array(dis_all) # G, dis_all = get_nxGraph_chebyshev(X, epsw=1.0 / 5.0) # simPlotGraph(G) # get_distributions(dis_all)# 便于找到epsw大小 # # cityblock def get_nxGraph_cityblock(X, epsw=1.0 / 5.0): (m, n) = X.shape edges_list = [] dis_all = [] dis_max = np.max([distance.cityblock(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j]) for i in range(m): for j in range(n): if i != j: dis = distance.cityblock(X[i, :], X[j, :]) dis_all.append(dis) if dis <= 1e-5: weight = dis_max else: weight = 1.0 / dis if weight >= epsw: edge = (i, j, {'weight': weight}) edges_list.append(edge) G = nx.Graph() G.add_edges_from(edges_list) return G, np.array(dis_all) # G, dis_all = get_nxGraph_cityblock(X, epsw=1.0 / 5.0) # simPlotGraph(G) # get_distributions(dis_all)# 便于找到epsw大小 # # correlation def get_nxGraph_correlation(X, epsw=0.5): (m, n) = X.shape edges_list = [] dis_all = [] # dis_max = np.max([distance.correlation(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j]) for i in range(m): for j in range(n): if i != j: dis = distance.correlation(X[i, :], X[j, :]) dis_all.append(dis) weight = dis if weight >= epsw: edge = (i, j, {'weight': weight}) edges_list.append(edge) G = nx.Graph() G.add_edges_from(edges_list) return G, np.array(dis_all) # G, dis_all = get_nxGraph_correlation(X, epsw=0.5) # simPlotGraph(G) # get_distributions(dis_all)# 便于找到epsw大小 # # pearsonr correlation def get_nxGraph_pearsonr(X, epsw=0.5): (m, n) = X.shape edges_list = [] dis_all = [] # dis_max = np.max([pearsonr(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j]) for i in range(m): for j in range(n): if i != j: dis = np.max(pearsonr(X[i, :], X[j, :])) dis_all.append(dis) weight = dis if weight >= epsw: edge = (i, j, {'weight': weight}) edges_list.append(edge) G = nx.Graph() G.add_edges_from(edges_list) return G, np.array(dis_all) # G, dis_all = get_nxGraph_pearsonr(X, epsw=0.5) # simPlotGraph(G) # get_distributions(dis_all)# 便于找到epsw大小 # # cosine def get_nxGraph_cosine(X, epsw=0.5): (m, n) = X.shape edges_list = [] dis_all = [] # dis_max = np.max([distance.cosine(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j]) for i in range(m): for j in range(n): if i != j: dis = distance.cosine(X[i, :], X[j, :]) dis_all.append(dis) weight = dis if weight >= epsw: edge = (i, j, {'weight': weight}) edges_list.append(edge) G = nx.Graph() G.add_edges_from(edges_list) return G, np.array(dis_all) # G, dis_all = get_nxGraph_cosine(X, epsw=0.5) # simPlotGraph(G) # get_distributions(dis_all)# 便于找到epsw大小 # # euclidean def get_nxGraph_euclidean(X, epsw=1.0 / 5.0): (m, n) = X.shape edges_list = [] dis_all = [] dis_max = np.max([distance.euclidean(X[i, :], X[j, :]) for i in range(m) for j in range(n) if i != j]) for i in range(m): for j in range(n): if i != j: dis = distance.euclidean(X[i, :], X[j, :]) dis_all.append(dis) if dis <= 1e-5: weight = dis_max else: weight = 1.0 / dis if weight >= epsw: edge = (i, j, {'weight': weight}) edges_list.append(edge) G = nx.Graph() G.add_edges_from(edges_list) return G, np.array(dis_all) # G, dis_all = get_nxGraph_euclidean(X, epsw=1.0 / 5.0) # simPlotGraph(G) # get_distributions(dis_all)# 便于找到epsw大小 # # mahalanobis def get_nxGraph_mahalanobis(X, epsw=1.0 / 5.0): (m, n) = X.shape edges_list = [] dis_all = [] dis_max = [] for i in range(m): for j in range(n): if i != j: vectori, vectorj = X[i, :], X[j, :] vectori_mahala, vectorj_mahala = vectori[:, np.newaxis], vectorj[:, np.newaxis] cov_X = np.cov(np.hstack((vectori_mahala, vectorj_mahala))) dis_mahalanobis = distance.mahalanobis(vectori, vectorj, VI=cov_X) dis_max.append(dis_mahalanobis) dis_all.append(dis_mahalanobis) dis_max = np.max(dis_max) for i in range(m): for j in range(n): if i != j: vectori, vectorj = X[i, :], X[j, :] vectori_mahala, vectorj_mahala = vectori[:, np.newaxis], vectorj[:, np.newaxis] cov_X = np.cov(np.hstack((vectori_mahala, vectorj_mahala))) dis = distance.mahalanobis(vectori, vectorj, VI=cov_X) if dis <= 1e-5: weight = dis_max else: weight = 1.0 / dis if weight >= epsw: edge = (i, j, {'weight': weight}) edges_list.append(edge) G = nx.Graph() G.add_edges_from(edges_list) return G, np.array(dis_all) # G, dis_all = get_nxGraph_mahalanobis(X, epsw=1.0 / 100.0) # simPlotGraph(G) # get_distributions(dis_all)# 便于找到epsw大小 # # rbf_kernel Gaussian Similarity # gamma need to be setup. def get_nxGraph_rbf_kernel(X, epsw=1/0.01, gamma = 0.025): (m, n) = X.shape dis_all = [] edges_list = [] for i in range(m): for j in range(n): if i != j: vectori, vectorj = X[i, :], X[j, :] # rbf_kernel Gaussian Similarity X_nm, Y_nm = vectori[np.newaxis, :], vectorj[np.newaxis, :] # array of shape (n_samples_X, n_features) dis_rbf = rbf_kernel(X=X_nm, Y=Y_nm, gamma=gamma)[0, 0] # gamma need to be setup. dis_all.append(dis_rbf) if dis_rbf < 1e-2: dis_rbf = 1e-2 weight = 1.0/dis_rbf if weight >= epsw: edge = (i, j, {'weight': weight}) edges_list.append(edge) G = nx.Graph() G.add_edges_from(edges_list) return G, np.array(dis_all) # G, dis_all = get_nxGraph_rbf_kernel(X, epsw=1/0.01, gamma = 0.025) # simPlotGraph(G) # get_distributions(dis_all)# 便于找到epsw大小 # knn enn def get_nxGraph_knn_enn(X, n_neighbors=5, radius=11, epsw=1.0 / 20.0): # knn enn # n_neighbors = 5 # radius = 11 # epsw = 0.5 dis_all = [] samples = X # algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'} , default='auto' Algorithm used to compute the nearest neighbors neigh = NearestNeighbors(n_neighbors=n_neighbors, radius=radius, algorithm='auto', leaf_size=30, metric='minkowski', p=2)# p=2 isequivalent to euclidean neigh.fit(samples) knn_edges_list = [] enn_edges_list = [] for i in range(m): v = X[i, :][np.newaxis, :] knn_dis, knn_node = neigh.kneighbors(X=v, n_neighbors=n_neighbors, return_distance=True) knn_dis, knn_node = list(knn_dis[0]), list(knn_node[0]) dis_all += knn_dis index, ind = None, 0 for nd in knn_node: if nd == i: index = ind break ind += 1 if index is not None: knn_node.pop(index) knn_dis.pop(index) index = 0 for neigh_v in knn_node: dis = knn_dis[index] if dis < 1e-5: weight = np.max(knn_dis) else: weight = 1.0/dis if weight >= epsw: edge = (i, neigh_v, {'weight': weight}) knn_edges_list.append(edge) index += 1 enn_dis, enn_node = neigh.radius_neighbors(X=v, radius=radius, return_distance=True) enn_dis, enn_node = list(enn_dis[0]), list(enn_node[0]) index, ind = None, 0 for nd in enn_node: if nd == i: index = ind break ind += 1 if index is not None: enn_node.pop(index) enn_dis.pop(index) for neigh_v in enn_node: edge = (i, neigh_v, {'weight': 1.0}) enn_edges_list.append(edge) knn_G = nx.Graph() knn_G.add_edges_from(knn_edges_list) enn_G = nx.Graph() enn_G.add_edges_from(enn_edges_list) return knn_G, enn_G, np.array(dis_all) knn_G, enn_G, dis_all = get_nxGraph_knn_enn(X, n_neighbors=5, radius=11, epsw=1.0/20.0) simPlotGraph(knn_G) simPlotGraph(enn_G) get_distributions(dis_all)# 便于找到epsw大小