zoukankan      html  css  js  c++  java
  • MaxDICluster

       1 import pickle
       2 import numpy as np
       3 import pandas as pd
       4 import networkx as nx
       5 import geopandas as gpd
       6 import scipy.sparse as sp
       7 import matplotlib.pyplot as plt
       8 from scipy.spatial import cKDTree
       9 import xml.etree.cElementTree as et
      10 from joblib import Parallel, delayed
      11 from shapely.geometry import Point, MultiLineString, Polygon
      12 from sklearn.metrics.pairwise import pairwise_kernels, rbf_kernel
      13 from sklearn.neighbors import kneighbors_graph, radius_neighbors_graph
      14 
      15 # get coo_matrix from G.
      16 def get_coo_matrix_from_G(G, weight=None):
      17     from scipy.sparse import coo_matrix
      18     row = np.array([u for u, _ in G.edges(data=False)])
      19     col = np.array([v for _, v in G.edges(data=False)])
      20     if weight is not None:
      21         data = np.array([w['weight'] for _, __, w in G.edges(data=True)])
      22     else:
      23         data = np.ones(shape=row.shape, dtype=np.float)
      24     coo_mat = coo_matrix((data, (row, col)), dtype=np.float)
      25     row = coo_mat.row
      26     col = coo_mat.col
      27     data = coo_mat.data
      28     return row, col, data
      29 
      30 def get_Vigidegi(row, col, data, ndq):
      31     deg_ndq = {}  # ndq degrees
      32     nodes = []
      33     weights = []
      34     for nd in ndq:
      35         index_row = np.where(row == nd)
      36         index_col = np.where(col == nd)
      37         u = list(row[index_col]) + list(col[index_row])
      38         index_data = np.array(list(index_row[0]) + list(index_col[0]), dtype=np.int)
      39         w = list(data[index_data])
      40         deg_ndq[nd] = np.sum(w)
      41         nodes += u
      42         weights += w
      43     for nd in ndq:
      44         for _ in range(nodes.count(nd)):
      45             nodes.remove(nd)
      46     # V_G = np.sum(data) * 2.0
      47     deg_i = deg_ndq
      48     gi = len(nodes)
      49     Vi = np.sum(weights)
      50     return Vi, gi, deg_i
      51 
      52 def get_Vgdeg(row, col, data, ndq_a, ndq_b):
      53     ndq = ndq_a + ndq_b
      54     L_X, L_Y, L_XY = len(ndq_a), len(ndq_b), len(ndq)
      55     deg_ndq = {}  # ndq degrees
      56     nodes_a, weights_a = [], []
      57     nodes_b, weights_b = [], []
      58     nodes, weights = [], []
      59     for nd in ndq[:L_X]:
      60         index_row = np.where(row == nd)
      61         index_col = np.where(col == nd)
      62         u = list(row[index_col]) + list(col[index_row])
      63         index_data = np.array(list(index_row[0]) + list(index_col[0]), dtype=np.int)
      64         w = list(data[index_data])
      65         deg_ndq[nd] = np.sum(w)
      66         nodes += u
      67         weights += w
      68     nodes_a += nodes
      69     weights_a += weights
      70     for nd in ndq[L_X:]:
      71         index_row = np.where(row == nd)
      72         index_col = np.where(col == nd)
      73         u = list(row[index_col]) + list(col[index_row])
      74         index_data = np.array(list(index_row[0]) + list(index_col[0]), dtype=np.int)
      75         w = list(data[index_data])
      76         deg_ndq[nd] = np.sum(w)
      77         nodes += u
      78         weights += w
      79     nodes_b += nodes[len(nodes_a):len(nodes)]
      80     weights_b += weights[len(weights_a):len(weights)]
      81     for nd in ndq[:L_X]:
      82         for _ in range(nodes.count(nd)):
      83             nodes.remove(nd)
      84         for _ in range(nodes_a.count(nd)):
      85             nodes_a.remove(nd)
      86     for nd in ndq[L_X:]:
      87         for _ in range(nodes.count(nd)):
      88             nodes.remove(nd)
      89         for _ in range(nodes_b.count(nd)):
      90             nodes_b.remove(nd)
      91     # V_G = np.sum(data) * 2.0
      92     deg_ij = deg_ndq
      93     deg_i = {nd: deg_ndq[nd] for nd in ndq_a}
      94     deg_j = {nd: deg_ndq[nd] for nd in ndq_b}
      95     gi = len(nodes_a)
      96     Vi = np.sum(weights_a)
      97     gj = len(nodes_b)
      98     Vj = np.sum(weights_b)
      99     gij = len(nodes)
     100     Vij = np.sum(weights)
     101     return Vi, Vj, Vij, gi, gj, gij, deg_i, deg_j, deg_ij
     102 
     103 def deltDI_ij(row, col, data, ndq_a, ndq_b):
     104     ndq = ndq_a + ndq_b
     105     L_X, L_Y, L_XY = len(ndq_a), len(ndq_b), len(ndq)
     106     deg_ndq = {}  # ndq degrees
     107     nodes_a, weights_a = [], []
     108     nodes_b, weights_b = [], []
     109     nodes, weights = [], []
     110     for nd in ndq[:L_X]:
     111         index_row = np.where(row == nd)
     112         index_col = np.where(col == nd)
     113         u = list(row[index_col]) + list(col[index_row])
     114         index_data = np.array(list(index_row[0]) + list(index_col[0]), dtype=np.int)
     115         w = list(data[index_data])
     116         deg_ndq[nd] = np.sum(w)
     117         nodes += u
     118         weights += w
     119     nodes_a += nodes
     120     weights_a += weights
     121     for nd in ndq[L_X:]:
     122         index_row = np.where(row == nd)
     123         index_col = np.where(col == nd)
     124         u = list(row[index_col]) + list(col[index_row])
     125         index_data = np.array(list(index_row[0]) + list(index_col[0]), dtype=np.int)
     126         w = list(data[index_data])
     127         deg_ndq[nd] = np.sum(w)
     128         nodes += u
     129         weights += w
     130     nodes_b += nodes[len(nodes_a):len(nodes)]
     131     weights_b += weights[len(weights_a):len(weights)]
     132 
     133     for nd in ndq[:L_X]:
     134         for _ in range(nodes.count(nd)):
     135             nodes.remove(nd)
     136         for _ in range(nodes_a.count(nd)):
     137             nodes_a.remove(nd)
     138     for nd in ndq[L_X:]:
     139         for _ in range(nodes.count(nd)):
     140             nodes.remove(nd)
     141         for _ in range(nodes_b.count(nd)):
     142             nodes_b.remove(nd)
     143     # deg_ij = deg_ndq
     144     # deg_i = {nd: deg_ndq[nd] for nd in ndq_a}
     145     # deg_j = {nd: deg_ndq[nd] for nd in ndq_b}
     146     V_G = np.sum(data) * 2.0
     147     g_i = len(nodes_a)
     148     V_i = np.sum(weights_a)
     149     g_j = len(nodes_b)
     150     V_j = np.sum(weights_b)
     151     g_ij = len(nodes)
     152     V_ij = np.sum(weights)
     153 
     154     if V_i < 1e-5:
     155         V_i = 1.0
     156     if V_j < 1e-5:
     157         V_j = 1.0
     158     if V_ij < 1e-5:
     159         V_ij = 1.0
     160     if V_G < 1e-5:
     161         V_G = 1.0
     162 
     163     delt = -(V_i - g_i) * np.log2(V_i) - (V_j - g_j) * np.log2(V_j) 
     164            + (V_ij - g_ij) * np.log2(V_ij) 
     165            + (V_i + V_j - V_ij - g_i - g_j + g_ij) * np.log2(V_G)
     166 
     167     return delt / V_G
     168 
     169 def parts_DI(row, col, data, ndq):
     170     deg_ndq = {}  # ndq degrees
     171     nodes = []
     172     weights = []
     173     for nd in ndq:
     174         index_row = np.where(row == nd)
     175         index_col = np.where(col == nd)
     176         u = list(row[index_col]) + list(col[index_row])
     177         index_data = np.array(list(index_row[0]) + list(index_col[0]), dtype=np.int)
     178         w = list(data[index_data])
     179         deg_ndq[nd] = np.sum(w)
     180         nodes += u
     181         weights += w
     182     for nd in ndq:
     183         for _ in range(nodes.count(nd)):
     184             nodes.remove(nd)
     185     V_G = np.sum(data) * 2.0
     186     # deg_i = deg_ndq
     187     gi = len(nodes)
     188     Vi = np.sum(weights)
     189     V_div = Vi / V_G
     190     V_div_hat = (Vi - gi) / V_G
     191 
     192     return 0.0 if V_div < 1e-5 else -V_div_hat * np.log2(V_div)
     193 
     194 def producer(N):
     195     ndi = N[0]
     196     N.remove(ndi)
     197     for ndj in N:
     198         yield list(ndi), list(ndj)
     199 
     200 def partition_producer(partition):
     201     for L in partition:
     202         yield L
     203 
     204 def pLog2p(p_i, eps=1e-10):
     205     ind = np.where(p_i < eps)
     206     if len(ind) > 0:
     207         p_i[p_i < eps] = 1.0
     208         return p_i * np.log2(p_i)
     209     else:
     210         return p_i * np.log2(p_i)
     211 
     212 def Log2p(p_i, eps=1e-10):
     213     ind = np.where(p_i < eps)
     214     if len(ind) > 0:
     215         p_i[p_i < eps] = 1.0
     216         return np.log2(p_i)
     217     else:
     218         return np.log2(p_i)
     219 
     220 # oneStruInforEntropy
     221 def get_oneStruInforEntropy(G, weight=None):
     222     G_du = G.degree(weight=weight)
     223     G_volume = sum(G_du[index_node] for index_node in G.nodes)
     224     G_pu_dic = {index_node: G_du[index_node] * 1.0 / (1.0 * G_volume) for index_node in G.nodes}
     225     G_pu = [G_pu_dic[index_node] for index_node in G.nodes]
     226     # Shonnon Entropy
     227     HP_G_Shonnon = sum(pLog2p(np.array(G_pu))) * (-1.0)
     228     return HP_G_Shonnon
     229 
     230 # StruInforEntropy
     231 def StruInforEntropy(G, partition, weight=None):
     232     # nodes = G.nodes
     233     # n = G.number_of_nodes()
     234     # m = G.number_of_edges()
     235     sub_set = partition.copy()
     236     degree = G.degree(weight=weight)
     237     G_volume = sum(degree[index_node] for index_node in G.nodes)
     238     Vij, gij, deg_ij = [], [], []
     239     for ind in range(len(sub_set)):
     240         sub_degree = 0
     241         dij = []
     242         for node in sub_set[ind]:
     243             sub_degree += degree[node]
     244             dij.append(degree[node])
     245         gj_c = nx.cut_size(G, sub_set[ind], weight=weight)
     246         Vij.append(sub_degree)
     247         gij.append(gj_c)
     248         deg_ij.append(np.array(dij))
     249     gij = np.array(gij, dtype=float)
     250     Vij = np.array(Vij, dtype=float)
     251     p_i = [deg_ij[i] / Vij[i] for i in range(len(Vij))]
     252     pLogp = [pLog2p(pi, eps=1e-10) for pi in p_i]
     253     sum_pLogp = np.array([np.sum(plp) for plp in pLogp], dtype=float)
     254 
     255     first = np.sum((-1.0) * Vij / (G_volume) * sum_pLogp)
     256     second = np.sum((-1.0) * gij / (G_volume) * Log2p(Vij / (G_volume)))
     257 
     258     HG = first + second
     259 
     260     return HG, Vij, gij, deg_ij
     261 
     262 # StruInforEntropy
     263 def get_oneStruInforEntropy_Partition(G, partition, weight=None):
     264     # nodes = G.nodes
     265     # n = G.number_of_nodes()
     266     # m = G.number_of_edges()
     267     sub_set = partition.copy()
     268     degree = G.degree(weight=weight)
     269     G_volume = sum(degree[index_node] for index_node in G.nodes)
     270     Vij, gij, deg_ij = [], [], []
     271     for ind in range(len(sub_set)):
     272         sub_degree = 0
     273         dij = []
     274         for node in sub_set[ind]:
     275             sub_degree += degree[node]
     276             dij.append(degree[node])
     277         gj_c = nx.cut_size(G, sub_set[ind], weight=weight)
     278         Vij.append(sub_degree)
     279         gij.append(gj_c)
     280         deg_ij.append(np.array(dij))
     281     gij = np.array(gij, dtype=float)
     282     Vij = np.array(Vij, dtype=float)
     283     p_i = [deg_ij[i] / Vij[i] for i in range(len(Vij))]
     284     pLogp = [pLog2p(pi, eps=1e-10) for pi in p_i]
     285     sum_pLogp = np.array([np.sum(plp) for plp in pLogp], dtype=float)
     286 
     287     first = np.sum((-1.0) * Vij / (G_volume) * sum_pLogp)
     288     second = np.sum((-1.0) * pLog2p(Vij / G_volume))
     289 
     290     oneHG = first + second
     291 
     292     return oneHG, Vij, gij, deg_ij
     293 
     294 # # test: 2D-StruInforEntropy
     295 # partition = [{0, 1, 2, 3}, {4, 5, 6, 7, 8}, {9, 10, 11, 12, 13}]
     296 # HP_G, Vj, g_j, Gj_deg = StruInforEntropy(G, partition)
     297 
     298 def delt_Xij(Xi, Xj, G, weight=None):
     299     Xij = Xi + Xj
     300     sub_set = [Xi, Xj, list(set(Xij))]
     301     degree = G.degree(weight=weight)
     302     G_volume = sum(degree[index_node] for index_node in G.nodes)
     303     Vij, gij = [], []
     304     for ind in range(len(sub_set)):
     305         sub_degree = 0
     306         for node in sub_set[ind]:
     307             sub_degree += degree[node]
     308         gj_c = nx.cut_size(G, sub_set[ind], weight=weight)
     309         Vij.append(sub_degree)
     310         gij.append(gj_c)
     311     gij = np.array(gij)
     312     Vij = np.array(Vij)
     313     g_i, g_j, g_ij = gij[0], gij[1], gij[2]
     314     V_i, V_j, V_ij = Vij[0], Vij[1], Vij[2]
     315     log_Vij = Log2p(Vij, eps=1e-10)
     316     delt_G_Pij = 1.0 / (G_volume) * ((V_i - g_i) * log_Vij[0] +
     317                                      (V_j - g_j) * log_Vij[1] -
     318                                      (V_ij - g_ij) * log_Vij[2] +
     319                                      (g_i + g_j - g_ij) * np.log2(G_volume + 1e-10))
     320     return delt_G_Pij
     321 
     322 def delt_RXij(Xi, Xj, G, weight=None):
     323     Xij = Xi + Xj
     324     sub_set = [Xi, Xj, list(set(Xij))]
     325     # sub_set = [[0, 1], [2, 3], [0, 1, 2, 3]]
     326     # n = G.number_of_nodes()
     327     # m = G.number_of_edges()
     328     degree = G.degree(weight=weight)
     329     G_volume = sum(degree[index_node] for index_node in G.nodes) + 1e-10
     330     Vij, gij = [], []
     331     for ind in range(len(sub_set)):
     332         sub_degree = 0
     333         for node in sub_set[ind]:
     334             sub_degree += degree[node]
     335         gj_c = nx.cut_size(G, sub_set[ind], weight=weight)
     336         Vij.append(sub_degree)
     337         gij.append(gj_c)
     338     g_i, g_j, g_ij = gij[0], gij[1], gij[2]
     339     V_i, V_j, V_ij = Vij[0], Vij[1], Vij[2]
     340     # log_Vij = Log2p(Vij, eps=1e-10)
     341 
     342     RP_i = -(V_i - g_i) / G_volume * np.log2(V_i / G_volume + 1e-10)
     343     RP_j = -(V_j - g_j) / G_volume * np.log2(V_j / G_volume + 1e-10)
     344     RP_ij = -(V_ij - g_ij) / G_volume * np.log2(V_ij / G_volume + 1e-10)
     345 
     346     delt_G_RPij = RP_i + RP_j - RP_ij
     347 
     348     return delt_G_RPij
     349 
     350 def doWhile(G, NodeA, weight=None):
     351     count = 0
     352     L = len(NodeA)
     353     for Xi in NodeA:
     354         NodeB = NodeA.copy()
     355         NodeB.remove(Xi)
     356         for Xj in NodeB:
     357             delt_ij = delt_Xij(Xi, Xj, G, weight=weight)  # 函数
     358             if delt_ij > 0:
     359                 return True
     360     #         if delt_ij < 0:
     361     #             count += 1
     362     # if count - L * (L - 1) != 0:
     363     #     return True
     364     return False
     365 
     366 def doRWhile(G, NodeA, weight=None):
     367     count = 0
     368     L = len(NodeA)
     369     for Xi in NodeA:
     370         NodeB = NodeA.copy()
     371         NodeB.remove(Xi)
     372         for Xj in NodeB:
     373             delt_ij = delt_RXij(Xi, Xj, G, weight=weight)  # 函数
     374             if delt_ij <= 0:
     375                 return True
     376     #         if delt_ij > 0:
     377     #             count += 1
     378     # if count - L * (L - 1) != 0:
     379     #     return True
     380     return False
     381 
     382 def doWhile_js(row, col, data, NodeA):
     383     count = 0
     384     L = len(NodeA)
     385     for Xi in NodeA:
     386         NodeB = NodeA.copy()
     387         NodeB.remove(Xi)
     388         for Xj in NodeB:
     389             delt_ij = deltDI_ij(row, col, data, ndq_a=Xi, ndq_b=Xj)  # 函数
     390             if delt_ij <= 0:
     391                 return True
     392     #         if delt_ij > 0:
     393     #             count += 1
     394     # if count - L * (L - 1) != 0:
     395     #     return True
     396     return False
     397 
     398 ######################################算法主体min2DStruInforEntropyPartition -- 开始###########################################
     399 def min2DStruInforEntropyPartition(G, weight=None):
     400     # Input 算法主体 -- 开始
     401     print("Partition by min2DHG ..........")
     402     nodes = list(G.nodes())
     403     nodes.sort()  # 节点编号升序排列
     404     global NodeA
     405     NodeA = [[node] for node in nodes]
     406     print("Init-Input:", NodeA)  # Input Data
     407     doWhileFlg = True
     408     NodeA.reverse()  # 逆序
     409     while doWhileFlg:
     410         Xi = NodeA.pop()
     411         Nj = NodeA.copy()
     412         delt_max = 0
     413         Xj_m = None
     414         for Xj in Nj:
     415             delt_ij = delt_Xij(Xi, Xj, G, weight=weight)  # 函数
     416             if delt_ij > 0 and delt_ij > delt_max:
     417                 Xj_m = Xj
     418                 delt_max = delt_ij
     419         if Xj_m in Nj and Xj_m is not None:
     420             Nj.remove(Xj_m)
     421             Xij = Xi + Xj_m
     422             Nj.insert(0, Xij)
     423             # print('Xi:', Xi, '+ Xj:', Xj_m, '-->', Xij, ' delt_ij_HG:', delt_max)
     424         elif Xj_m is None:
     425             Nj.insert(0, Xi)  # 首位值插入
     426         NodeA = Nj
     427         doWhileFlg = doWhile(G, NodeA, weight=weight)  # 是否继续循环
     428         # print(NodeA)
     429     # print('Output:', NodeA)
     430     sub_set = NodeA.copy()  # Final Result
     431     # Output: NodeA 算法主体 -- 结束
     432 
     433     # sort
     434     results = []
     435     for sb_result in sub_set:
     436         sb_result.sort()
     437         results.append(sb_result)
     438     results.sort()
     439     print('Output:', results)
     440     return results
     441 ######################################算法主体min2DStruInforEntropyPartition -- 结束###########################################
     442 
     443 ######################################算法主体maxDIPartition -- 开始###########################################
     444 def maxDIPartition(G, weight=None, pk_partion=None):
     445     # Input 算法主体 -- 开始
     446     print("Partition by maxDI ..........")
     447     nodes = list(G.nodes())
     448     nodes.sort()  # 节点编号升序排列
     449     global NodeA
     450     if pk_partion is None:
     451         nodes = list(G.nodes())
     452         nodes.sort()  # 节点编号升序排列
     453         NodeA = [[node] for node in nodes]
     454     else:
     455         NodeA = pk_partion
     456     print("Init-Input:", NodeA)  # Input Data
     457     doWhileFlg = True
     458     NodeA.reverse()  # 逆序
     459     while doWhileFlg:
     460         Xi = NodeA.pop()
     461         Nj = NodeA.copy()
     462         delt_min = 0
     463         Xj_m = None
     464         for Xj in Nj:
     465             delt_ij = delt_RXij(Xi, Xj, G, weight=weight)  # 函数
     466             if delt_ij < 0 and delt_ij < delt_min:
     467                 Xj_m = Xj
     468                 delt_min = delt_ij
     469         if Xj_m in Nj and Xj_m is not None:
     470             Nj.remove(Xj_m)
     471             Xij = Xi + Xj_m
     472             Nj.insert(0, Xij)
     473             # print('Xi:', Xi, '+ Xj:', Xj_m, '-->', Xij, ' delt_RXij:', delt_min)
     474         elif Xj_m is None:
     475             Nj.insert(0, Xi)  # 首位值插入
     476         NodeA = Nj
     477         doWhileFlg = doRWhile(G, NodeA, weight=weight)  # 是否继续循环
     478         # print(NodeA)
     479     # print('Output:', NodeA)
     480     sub_set = NodeA.copy()  # Final Result
     481     # Output: NodeA 算法主体 -- 结束
     482 
     483     # sort
     484     results = []
     485     for sb_result in sub_set:
     486         sb_result.sort()
     487         results.append(sb_result)
     488     results.sort()
     489     print('Output:', results)
     490     return results
     491 ######################################算法主体maxDIPartition -- 结束###########################################
     492 
     493 ######################################算法主体maxDI 优化 -- 开始###########################################
     494 def maxDI(G, weight=None, pk_partion=None):
     495     # Input 算法主体 -- 开始
     496     print("Partition by maxDI ..........")
     497     row, col, data = get_coo_matrix_from_G(G, weight=weight)
     498     nodes = list(G.nodes())
     499     nodes.sort()  # 节点编号升序排列
     500     global NodeA
     501     if pk_partion is None:
     502         nodes = list(G.nodes())
     503         nodes.sort()  # 节点编号升序排列
     504         NodeA = [[node] for node in nodes]
     505     else:
     506         NodeA = pk_partion
     507     print("Init-Input:", NodeA)  # Input Data
     508     doWhileFlg = True
     509     NodeA.reverse()  # 逆序
     510     while doWhileFlg:
     511         Xi = NodeA.pop()
     512         Nj = NodeA.copy()
     513         delt_min = 0
     514         Xj_m = None
     515         for Xj in Nj:
     516             delt_ij = deltDI_ij(row, col, data, ndq_a=Xi, ndq_b=Xj)  # 函数
     517             if delt_ij < 0 and delt_ij < delt_min:
     518                 Xj_m = Xj
     519                 delt_min = delt_ij
     520         if Xj_m in Nj and Xj_m is not None:
     521             Nj.remove(Xj_m)
     522             Xij = Xi + Xj_m
     523             Nj.insert(0, Xij)
     524             # print('Xi:', Xi, '+ Xj:', Xj_m, '-->', Xij, ' delt_RXij:', delt_min)
     525         elif Xj_m is None:
     526             Nj.insert(0, Xi)  # 首位值插入
     527         NodeA = Nj
     528         doWhileFlg = doWhile_js(row, col, data, NodeA)  # 是否继续循环
     529         # print(NodeA)
     530     # print('Output:', NodeA)
     531     sub_set = NodeA.copy()  # Final Result
     532     # Output: NodeA 算法主体 -- 结束
     533 
     534     # sort
     535     results = []
     536     for sb_result in sub_set:
     537         sb_result.sort()
     538         results.append(sb_result)
     539     results.sort()
     540     print('Output:', results)
     541     return results
     542 ######################################算法主体maxDI 优化 -- 结束###########################################
     543 
     544 ######################################算法主体maxDI 迭代优化-Parallel -- 开始###########################################
     545 def iter_maxDI(G, iter_max=100, pk_partion=None, weight=None, n_jobs=4, verbose=0):
     546     # iter_max = 100
     547     # n_jobs = 4
     548     row, col, data = get_coo_matrix_from_G(G, weight=weight)
     549     global N
     550     if pk_partion is None:
     551         nodes = list(G.nodes())
     552         nodes.sort()  # 节点编号升序排列
     553         N = [[node] for node in nodes]
     554     else:
     555         N = pk_partion
     556     out = Parallel(n_jobs=n_jobs, verbose=verbose)(delayed(parts_DI)(row, col, data, P) for P in partition_producer(N))
     557     # print('(iter:%d ---> DI:%.2f bits)' % (0, np.sum(out)))
     558     DI = np.zeros(iter_max + 1)
     559     DI[0] = float('%.3f' % np.sum(out))
     560     print('(iter:%d ---> DI:%.3f bits)' % (0, DI[0]))
     561     for iter in range(iter_max):
     562         ndi = N[0]
     563         out = Parallel(n_jobs=n_jobs, verbose=verbose)(
     564             delayed(deltDI_ij)(row, col, data, ndi, ndj) for ndi, ndj in producer(N))
     565         out_min = min(out)
     566         if out_min < 0:
     567             ndj = N[out.index(out_min)]
     568             N.remove(ndj)
     569             N.append(ndi + ndj)
     570         elif ndi not in N:
     571             N.append(ndi)
     572         out = Parallel(n_jobs=n_jobs, verbose=verbose)(
     573             delayed(parts_DI)(row, col, data, P) for P in partition_producer(N))
     574         DI[iter + 1] = float('%.3f' % np.sum(out))
     575         if (iter + 1) % 50 == 0:
     576             print('(iter:%d ---> DI:%.3f bits)' % (iter+1, DI[iter+1]))
     577         # if (iter + 1) >= 2000 and np.var(DI[-2000:-1], ddof=1) < 1e-10:  # 计算样本方差 ( 计算时除以 N - 1 )
     578         #     DI = DI[:iter + 2]
     579         #     break
     580 
     581     # sort results
     582     results = []
     583     for sb_result in N:
     584         sb_result.sort()
     585         results.append(sb_result)
     586     results.sort()
     587 
     588     return results, DI
     589 ######################################算法主体maxDI 迭代优化-Parallel -- 开始###########################################
     590 
     591 # get DI by Parallel
     592 def get_DI_Parallel(G, partion=None, weight=None, n_jobs=4, verbose=0):
     593     row, col, data = get_coo_matrix_from_G(G, weight=weight)
     594     out = Parallel(n_jobs=n_jobs, verbose=verbose)(
     595         delayed(parts_DI)(row, col, data, P) for P in partition_producer(partion))
     596     DI = float('%.3f' % np.sum(out))
     597     return DI
     598 # get DI by maxDIPartition
     599 def get_DI(G, weight=None):
     600     print("G: number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
     601     results = maxDIPartition(G, weight=weight)
     602     twoHG, Vj, g_j, Gj_deg = StruInforEntropy(G, partition=results, weight=weight)
     603     oneHG = get_oneStruInforEntropy(G, weight=weight)
     604     DI = oneHG - twoHG
     605     DI = float('%.3f' % DI)
     606     return DI
     607 
     608 # n, m = G.number_of_nodes(), G.number_of_edges()
     609 # Eg_List = [(u, v, w['weight']) for (u, v, w) in G.edges(data=True)]
     610 def create_weight_graph_txt_file(G, filename='InputGraph'):
     611     file = open(filename + '.txt', mode='w')
     612     str_number_of_nodes = '{0}
    '.format(G.number_of_nodes())
     613     file.write(str_number_of_nodes)
     614     for (u, v, w) in G.edges(data=True):
     615         str_edge = '{0} {1} {2}
    '.format(u, v, w['weight'])
     616         file.write(str_edge)
     617     file.close()
     618 
     619 # Plot Graph
     620 def show_graph(G, weight='weight', with_labels=False, save=False, filename='./graph.svg'):
     621     # test: outer point
     622     print("G: number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
     623     # pos = nx.spring_layout(G)  # set layout
     624     pos = nx.kamada_kawai_layout(G)
     625     # nx.draw(G, pos=pos, node_size=300, with_labels=with_labels, node_color='red')
     626     nodecolor = G.degree(weight=weight)  # 度数越大,节点越大,连接边颜色越深
     627     nodecolor2 = pd.DataFrame(nodecolor)  # 转化称矩阵形式
     628     nodecolor3 = nodecolor2.iloc[:, 1]  # 索引第二列
     629     edgecolor = range(G.number_of_edges())  # 设置边权颜色
     630 
     631     nx.draw(G,
     632             pos,
     633             with_labels=with_labels,
     634             node_size=nodecolor3 * 6 * 10,
     635             node_color=nodecolor3 * 5,
     636             edge_color=edgecolor)
     637 
     638     if save:
     639         plt.savefig(filename, dpi=600, transparent=True)
     640     plt.show()
     641 
     642 # label_result
     643 def label_result(nsamples, result):
     644     y_result = np.zeros((nsamples,)) - 1  # -1 为噪声点
     645     cls = 0
     646     for p in result:
     647         y_result[p] = cls
     648         cls += 1
     649     return y_result
     650 
     651 # Decomposition
     652 def XDecomposition(X, n_components=2):
     653     from sklearn.decomposition import PCA
     654     (m, n) = X.shape
     655     if n_components >= n:
     656         return X
     657     else:
     658         pca = PCA(n_components=n_components, svd_solver='full')
     659         pca.fit(X)
     660         X_trans = pca.transform(X)
     661         # pca.explained_variance_ratio_
     662         # pca.singular_values_
     663         return X_trans
     664 
     665 # Add  node attributes
     666 def set_node_attributes(G, X):
     667     attr = {}
     668     for node in G.nodes(data=False):
     669         attr[node] = {'x_'+str(j): X[node, j] for j in range(X.shape[1])}
     670     nx.set_node_attributes(G, attr)
     671     return G
     672 # Add  part_nodes attributes
     673 def set_part_node_attributes(G, nodelist, X):
     674     attr = {}
     675     for node in nodelist:
     676         attr[node] = {'x_'+str(j): X[node, j] for j in range(X.shape[1])}
     677     nx.set_node_attributes(G, attr)
     678     return G
     679 # update node_attributes
     680 def update_node_attributes(G, X, node_new):
     681     attr = {}
     682     attr[node_new] = {'x_' + str(j): X[node_new, j] for j in range(X.shape[1])}
     683     nx.set_node_attributes(G, attr)
     684     return G
     685 
     686 # query_GraphND_cKDTree
     687 def query_GraphND_cKDTree(G, points, num_feature):
     688     # X --> m x n; points  --> k x n
     689     # points = np.array([X, Y]).T
     690     nodes = {'x_' + str(j): nx.get_node_attributes(G, 'x_' + str(j)) for j in range(num_feature)}
     691     nodes_pd = pd.DataFrame(nodes)
     692     data = nodes_pd[['x_'+str(j) for j in range(num_feature)]]
     693 
     694     tree = cKDTree(data=data, compact_nodes=True, balanced_tree=True)
     695 
     696     dist, idx = tree.query(points, k=1)
     697     node = nodes_pd.iloc[idx].index
     698 
     699     return node, dist
     700 
     701 # get_poly_from_osm_circleline
     702 def get_poly_from_osm_circleline(circleline_osmfile=r"../osm/beijing_car_seg_6ring road.osm", toepsg=2436):# toepsg == 3857; 2436
     703     # 精确的北京六环道路 见 beijing_car_seg_6ring road.osm: (北京六环附近及往内的可驾驶道路网络(路网graph为连通图))https://www.cnblogs.com/jeshy/p/14763489.html
     704     # beijing_car_seg_6ring road.osm as the input data
     705     # root = et.parse(r"./beijing_car_seg_6ring road.osm").getroot()
     706     root = et.parse(circleline_osmfile).getroot()
     707     nodes = {}
     708     for node in root.findall('node'):
     709         nodes[int(node.attrib['id'])] = (float(node.attrib['lon']), float(node.attrib['lat']))
     710     edges = []
     711     for way in root.findall('way'):
     712         element_nd = way.findall('nd')
     713         if len(element_nd) > 2:
     714             for i in range(len(element_nd) - 1):
     715                 node_s, node_e = int(element_nd[i].attrib['ref']), int(element_nd[i+1].attrib['ref'])
     716                 path = (node_s, node_e)
     717                 edges.append(path)
     718         else:
     719             node_s, node_e = int(element_nd[0].attrib['ref']), int(element_nd[1].attrib['ref'])
     720             path = (node_s, node_e)
     721             edges.append(path)
     722     edge = edges[0]
     723     E = []
     724     E.append(edge)
     725     edges.remove(edge)
     726     point_s, point_e = nodes[E[0][0]], nodes[E[0][1]]
     727     Point_lists = []
     728     Point_lists.append(list(point_s))
     729     Point_lists.append(list(point_e))
     730     while len(edges) > 0:
     731         (node_f_s, node_f_e) = E[-1]
     732         for (node_n_s, node_n_e) in edges:
     733             if node_f_e == node_n_s:
     734                 E.append((node_n_s, node_n_e))
     735                 point_f_e = nodes[node_n_e]
     736                 Point_lists.append(list(point_f_e))
     737                 # print((node_n_s, node_n_e))
     738                 edges.remove((node_n_s, node_n_e))
     739                 break
     740     # Points.pop()
     741     # print(E[0], '...', E[-2], E[-1])
     742     # print(Point_lists[0], '...', Point_lists[-2], Point_lists[-1])
     743 
     744     road_line_arr = np.array(Point_lists)  # 转换成二维坐标表示
     745     sixc_ring_poly = Polygon(road_line_arr)  # Polygon
     746 
     747     crs = {'init': 'epsg:4326'}
     748     gpoly = gpd.GeoSeries(sixc_ring_poly, crs=crs)
     749 
     750     if toepsg is not None:
     751         gpoly = gpoly.to_crs(epsg=toepsg)
     752     print('output gpoly.crs:', gpoly.crs)
     753 
     754     # poly = gpoly[0]
     755     # print('area(km*km):', poly.area / 1.0e6)
     756     # print('length(km):', poly.length / 1.0e3)
     757 
     758     return road_line_arr, sixc_ring_poly, gpoly
     759 
     760 # X2GDF
     761 def X2GDF(X, input_crs = "EPSG:2436", output_crs = "EPSG:4326", plot=True):
     762     # input_crs = "EPSG:2436"# input_crs = "EPSG:4326"
     763     # output_crs = "EPSG:4326"# output_crs = "EPSG:2436"
     764     coords, IDs = [], []
     765     for i in range(X.shape[0]):
     766         coords.append(Point(X[i, 0], X[i, 1]))# (X, Y)
     767         IDs.append("%d" % i)
     768     data = {'ID': IDs, 'geometry': coords}
     769     gdf = gpd.GeoDataFrame(data, crs=input_crs)
     770     # print(gdf.crs)  # 查看数据对应的投影信息(坐标参考系)
     771     if output_crs is not None:
     772         gdf.to_crs(crs=output_crs)# (Lon, Lat)
     773 
     774     if plot:
     775         gdf.plot()
     776         plt.show()#展示
     777 
     778     return gdf
     779 # GDF2X
     780 def GDF2X(gdata, to_epsg=2436, plot=True):
     781     print('input gdf crs:', gdata.crs)  # 查看数据对应的投影信息(坐标参考系) ---> epsg == 4326  (WGS_84)
     782     gdata = gdata.to_crs(epsg=to_epsg)# epsg == 2436 (X, Y), 4326 (lon, lat)
     783     print('output X crs:', gdata.crs)
     784     # print(gdata.columns.values)# 列名
     785     # print(gdata.head())  # 查看前5行数据
     786     if plot:
     787         gdata.plot()
     788         plt.show()#展示
     789     geo = gdata.geometry.values
     790     X0, Y0 = geo.x, geo.y
     791     X = np.zeros(shape=(X0.shape[0], 2), dtype=float)
     792     for i in range(X0.shape[0]):
     793         X[i, 0], X[i, 1] = X0[i],  Y0[i]
     794     return X
     795 
     796 # LonLat2XY
     797 def LonLat2XY(X, input_crs = "EPSG:4326", output_crs = "EPSG:3857", shpfile_name = "point_shp.shp"):
     798     # index = ['node_'+str(node) for node in range(X.shape[0])]
     799     # columns = ['column_'+str(col) for col in range(X.shape[1])]
     800     # df = pd.DataFrame(X, index=index, columns=columns)
     801     coords, IDs = [], []
     802     for i in range(X.shape[0]):
     803         coords.append(Point(X[i, 0], X[i, 1]))  # (X, Y)
     804         IDs.append("%d" % i)
     805 
     806     data = {'ID': IDs, 'geometry': coords}
     807     gdf = gpd.GeoDataFrame(data, crs=input_crs)
     808 
     809     print('input crs', gdf.crs)  # 查看数据对应的投影信息(坐标参考系)
     810     if output_crs is not None:
     811         gdf.to_crs(crs=output_crs)  # (Lon, Lat)
     812 
     813     print('output crs', gdf.crs)  # 查看数据对应的投影信息(坐标参考系)  output_crs = "EPSG:3857" or "EPSG:2436"
     814     if shpfile_name is not None:# shpfile = "point_shap.shp"
     815         gdf.to_file(filename=shpfile_name)
     816         print('shpfile is saved')
     817 
     818     geo = gdf.geometry.values
     819     X0, Y0 = geo.x, geo.y
     820     X = np.zeros(shape=(X0.shape[0], 2), dtype=float)
     821     for i in range(X0.shape[0]):
     822         X[i, 0], X[i, 1] = X0[i], Y0[i]
     823 
     824     return X
     825 
     826 # get_partion_result_outline
     827 def get_partion_result_outline(X, partion_result, input_crs = "EPSG:2436", output_crs = None, shpfile = "partion_outline_shp.shp", save_shp = True, plot = True):
     828     import alphashape
     829     # input_crs = "EPSG:2436"
     830     # output_crs = None
     831     # shpfile = "partion_outline_shp.shp"
     832     # save_shp = True
     833     # plot = True
     834     Polygs, Numbers, IDs = [], [], []
     835     for i in range(len(partion_result)):
     836         X_uniques = np.unique(X[partion_result[i]], axis=0)# 去除重复行
     837         if X_uniques.shape[0] > 2:
     838             gdf_alpha_shape = alphashape.alphashape(X2GDF(X_uniques, input_crs=input_crs, output_crs=output_crs, plot=None))
     839             polyg = gdf_alpha_shape.geometry.values# GeoDataFrameArray
     840             Polygs.append(polyg[0])
     841             Numbers.append(len(partion_result[i]))
     842             IDs.append("%d" % i)
     843     data = {'ID': IDs, 'num': Numbers, 'geometry': Polygs}
     844     gdf = gpd.GeoDataFrame(data, crs=input_crs)
     845 
     846     if output_crs is not None:
     847         gdf.to_crs(crs=output_crs)
     848 
     849     if save_shp:
     850         gdf.to_file(shpfile)
     851 
     852     if plot:
     853         gdf.plot()
     854         plt.show()  # 展示
     855 
     856     return gdf
     857 
     858 # X2Shpfile
     859 def X2Shpfile(X, shpfile="X_shp.shp", input_crs="EPSG:2436", output_crs="EPSG:4326"):
     860     # input_crs = "EPSG:2436"# input_crs = "EPSG:4326"
     861     # output_crs = "EPSG:4326"# output_crs = "EPSG:2436"
     862     # shpfile = "X_shp.shp"
     863     coords, IDs = [], []
     864     for i in range(X.shape[0]):
     865         coords.append(Point(X[i, 0], X[i, 1]))  # (X, Y)
     866         IDs.append("%d" % i)
     867     data = {'ID': IDs, 'geometry': coords}
     868     gdf = gpd.GeoDataFrame(data, crs=input_crs)
     869     # print(gdf.crs)  # 查看数据对应的投影信息(坐标参考系)
     870     if output_crs is not None:
     871         gdf.to_crs(crs=output_crs)  # (Lon, Lat)
     872     gdf.to_file(shpfile)
     873 # Edge2Shpfile
     874 def Edge2Shpfile(G, X, shpfile="edge_shp.shp", data=True, input_crs="EPSG:2436", output_crs="EPSG:4326"):
     875     # input_crs = "EPSG:2436"  # input_crs = "EPSG:4326"
     876     # output_crs = "EPSG:4326"  # output_crs = "EPSG:2436"
     877     # shpfile = "edge_shp.shp"
     878     coords, weights, edgeIDs = [], [], []
     879     if data:
     880         edges = [(u, v, w['weight']) for u, v, w in G.edges(data=data)]
     881     else:
     882         edges = [(u, v, 1.0) for u, v in G.edges(data=data)]
     883     id = 0
     884     for u, v, w in edges:
     885         edgeIDs.append(id)
     886         coords.append((([X[u, 0], X[u, 1]]), ([X[v, 0], X[v, 1]])))
     887         weights.append(w)
     888         id += 1
     889     data = {'edgeID': edgeIDs, 'weight': weights, 'geometry': MultiLineString(coords)}
     890     gdf = gpd.GeoDataFrame(data, crs=input_crs)
     891     # print(gdf.crs)  # 查看数据对应的投影信息(坐标参考系)
     892     if output_crs is not None:
     893         gdf.to_crs(crs=output_crs)
     894     gdf.to_file(shpfile)
     895 
     896 # FixingGraph
     897 def FixingGraph(G, X, merage_components=False):
     898     # Add  node attributes
     899     G = set_node_attributes(G, X)
     900     node_index = [nd for nd in G.nodes]
     901     if G.number_of_nodes() < X.shape[0]:
     902         print("Graph Fixing (Connecting the Isolated Points by distances)......... ")
     903         # Fixed Graph
     904         node_all_index = [nd for nd in range(X.shape[0])]
     905         node_non_index = list(set(node_all_index) - set(node_index))
     906         for ndi in node_non_index:
     907             ptq = X[ndi:ndi + 1, :]
     908             indices, distances = query_GraphND_cKDTree(G, points=ptq, num_feature=X.shape[1])
     909             add_edge = [(ndi, indices[0], {'weight': 1.0 / (distances[0] + 1e-5)})]
     910             G.add_edges_from(add_edge)
     911             # update the new node attributes
     912             G = update_node_attributes(G, X, node_new=ndi)
     913             node_index.append(ndi)
     914         print("G(Fixed): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
     915     # # onnected components in G
     916     if merage_components:
     917         connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
     918         Len_C = len(connected_components)
     919         print('Graph Components Numbers: ', Len_C)
     920         print("Graph Components Connecting ......... ")
     921         while Len_C >= 2:
     922             first_components = connected_components[0]
     923             # first_sub_graph
     924             first_sub_graph = G.subgraph(first_components).copy()
     925 
     926             # remove_nodes_from first_sub_graph for G
     927             G.remove_nodes_from(first_components)
     928 
     929             index_v = [nd for nd, data in first_sub_graph.nodes(data=True)]
     930             points = X[index_v, :]
     931 
     932             index_us, distances = query_GraphND_cKDTree(G, points=points, num_feature=X.shape[1])
     933 
     934             index_us, distances = list(index_us), list(distances)
     935             min_index = distances.index(min(distances))  # 返回最小值 index
     936             v, u, dis = index_v[min_index], index_us[min_index], distances[min_index]
     937             bridge_edge = [(v, u, {'weight': 1.0 / (dis + 1e-5)})]
     938             G.add_edges_from(bridge_edge)
     939 
     940             # update the new node attributes
     941             G = update_node_attributes(G, X, node_new=v)
     942 
     943             # add the subgraph to G
     944             sub_graph_edges = [edge for edge in first_sub_graph.edges(data=True)]
     945             G.add_edges_from(sub_graph_edges)
     946             nodelist = [node for node in G.nodes(data=False)]
     947             G = set_part_node_attributes(G, nodelist, X)
     948 
     949             connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
     950             Len_C = len(connected_components)
     951         print("G(Finally): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
     952 
     953     return G
     954 
     955 # getKNNGraph
     956 def getKNNGraph(X, n_neighbors=10, threshold=2.25, merage_components=False, save=True, shpfile=False):
     957     # threshold = threshold  ########### setup parameter ###########
     958     if save:
     959         output = open('X.pkl', 'wb')
     960         pickle.dump(X, output)
     961         output.close()
     962 
     963         # from sklearn.preprocessing import StandardScaler
     964         # X_trans = StandardScaler().fit_transform(X)
     965         # output = open('trans_X.pkl', 'wb')
     966         # pickle.dump(X_trans, output)
     967         # output.close()
     968         print("X is saved")
     969 
     970     knn_graph = kneighbors_graph(X, n_neighbors=n_neighbors, mode='distance', include_self=False, n_jobs=-1)
     971     # knn_graph = kneighbors_graph(X_trans, n_neighbors=n_neighbors, mode='distance', include_self=False, n_jobs=8)
     972     graph = knn_graph
     973     affinity = 0.5 * (graph + graph.T)
     974     edges_list, node_index = [], []
     975     G = nx.Graph()
     976     if hasattr(affinity, 'tocoo'):
     977         A = affinity.tocoo()  # csr_matrix --> coo
     978         # edges_list = [(i, j, {'weight': 1.0/v}) for i, j, v in zip(A.row, A.col, A.data)]  # coo matrix
     979         for i, j, v in zip(A.row, A.col, A.data):
     980             edge = (i, j, {'weight': 1.0 / (v + 1e-5)})
     981             edges_list.append(edge)
     982             node_index.append(i)
     983             node_index.append(j)
     984     G.add_edges_from(edges_list)
     985     node_index = list(set(node_index))
     986     print("knnG(all kneighbors ): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
     987 
     988     print("Graph Edge Removing ......... ")
     989     for node in node_index:
     990         neighbors = [nd for nd in nx.neighbors(G, node)]
     991         if len(neighbors) > 1:
     992             X_i = X[node, :]
     993             dis_dict = {}
     994             for nd in neighbors:
     995                 delt_X_k = X_i - X[nd, :]
     996                 dis = np.linalg.norm(delt_X_k)
     997                 dis_dict[nd] = dis
     998             dis_order = sorted(dis_dict.items(), key=lambda x: x[1], reverse=False)  # sort by ascending
     999             (min_id, min_dis), (max_id, max_dis) = dis_order[0], dis_order[-1]
    1000             if threshold >= min_dis and max_dis >= threshold:
    1001                 for nd in neighbors:
    1002                     if dis_dict[nd] > threshold:  # remove
    1003                         rm_edge = [(node, nd)]
    1004                         G.remove_edges_from(rm_edge)
    1005             elif threshold < min_dis:
    1006                 for nd in neighbors:  # remove all
    1007                     rm_edge = [(node, nd)]
    1008                     G.remove_edges_from(rm_edge)
    1009                 add_edge = [(node, nd, {'weight': 1.0 / min_dis})]  # add the small
    1010                 G.add_edges_from(add_edge)
    1011             else:  # threshold >= max_dis
    1012                 pass  # save all
    1013     print("knnG(edgermove): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
    1014 
    1015     # Add  node attributes
    1016     G = set_node_attributes(G, X)
    1017     if G.number_of_nodes() < X.shape[0]:
    1018         print("Graph Fixing ......... ")
    1019         # Fixed Graph
    1020         node_all_index = [nd for nd in range(X.shape[0])]
    1021         node_non_index = list(set(node_all_index) - set(node_index))
    1022         for ndi in node_non_index:
    1023             ptq = X[ndi:ndi+1, :]
    1024             indices, distances = query_GraphND_cKDTree(G, points=ptq, num_feature=X.shape[1])
    1025             add_edge = [(ndi, indices[0], {'weight': 1.0 / (distances[0] + 1e-5)})]
    1026             G.add_edges_from(add_edge)
    1027             # update the new node attributes
    1028             G = update_node_attributes(G, X, node_new=ndi)
    1029             node_index.append(ndi)
    1030         print("knnG(Fixed): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
    1031         # test
    1032         # Edge2Shpfile(G, X_pca, shpfile="edge_shp.shp", data=True, input_crs="EPSG:2436", output_crs=None)
    1033         # X2Shpfile(X_pca, shpfile="X_shp.shp", input_crs="EPSG:2436", output_crs=None)
    1034         # print("Shpfile is saved")
    1035     # # onnected components in G
    1036     if merage_components:
    1037         connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
    1038         Len_C = len(connected_components)
    1039         print('Graph Components Numbers: ', Len_C)
    1040         print("Graph Components Connecting ......... ")
    1041         while Len_C >= 2:
    1042             first_components = connected_components[0]
    1043             # first_sub_graph
    1044             first_sub_graph = G.subgraph(first_components).copy()
    1045 
    1046             # remove_nodes_from first_sub_graph for G
    1047             G.remove_nodes_from(first_components)
    1048 
    1049             index_v = [nd for nd, data in first_sub_graph.nodes(data=True)]
    1050             points = X[index_v, :]
    1051 
    1052             index_us, distances = query_GraphND_cKDTree(G, points=points, num_feature=X.shape[1])
    1053 
    1054             index_us, distances = list(index_us), list(distances)
    1055             min_index = distances.index(min(distances))# 返回最小值 index
    1056             v, u, dis = index_v[min_index], index_us[min_index], distances[min_index]
    1057             bridge_edge = [(v, u, {'weight': 1.0 / (dis + 1e-5)})]
    1058             G.add_edges_from(bridge_edge)
    1059 
    1060             # update the new node attributes
    1061             G = update_node_attributes(G, X, node_new=v)
    1062 
    1063             # add the subgraph to G
    1064             sub_graph_edges = [edge for edge in first_sub_graph.edges(data=True)]
    1065             G.add_edges_from(sub_graph_edges)
    1066             nodelist = [node for node in G.nodes(data=False)]
    1067             G = set_part_node_attributes(G, nodelist, X)
    1068 
    1069             connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
    1070             Len_C = len(connected_components)
    1071         print("knnG(Finally): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
    1072 
    1073     if save:
    1074         output = open('knnG.pkl', 'wb')
    1075         pickle.dump(G, output)
    1076         output.close()
    1077         print("knnG is saved")
    1078 
    1079     if shpfile:
    1080         # X Decomposition
    1081         X_pca = XDecomposition(X, n_components=2)
    1082         Edge2Shpfile(G, X_pca, shpfile="edge_shp.shp", data=True, input_crs="EPSG:2436", output_crs=None)
    1083         X2Shpfile(X_pca, shpfile="X_shp.shp", input_crs="EPSG:2436", output_crs=None)
    1084         print("Shpfile is saved")
    1085 
    1086     return G
    1087 # getRadiusGraph
    1088 def getRadiusGraph(X, radius=2.25, merage_components=False, save=True, shpfile=False):
    1089     # threshold = threshold  ########### setup parameter ###########
    1090     if save:
    1091         output = open('X.pkl', 'wb')
    1092         pickle.dump(X, output)
    1093         output.close()
    1094         # from sklearn.preprocessing import StandardScaler
    1095         # X_trans = StandardScaler().fit_transform(X)
    1096         # output = open('trans_X.pkl', 'wb')
    1097         # pickle.dump(X_trans, output)
    1098         # output.close()
    1099         print("X is saved")
    1100 
    1101     radius_graph = radius_neighbors_graph(X, radius=radius, mode='distance', include_self=False, n_jobs=-1)
    1102     graph = radius_graph
    1103     affinity = 0.5 * (graph + graph.T)
    1104     edges_list, node_index = [], []
    1105     G = nx.Graph()
    1106     if hasattr(affinity, 'tocoo'):
    1107         A = affinity.tocoo()  # csr_matrix --> coo
    1108         # edges_list = [(i, j, {'weight': 1.0/v}) for i, j, v in zip(A.row, A.col, A.data)]  # coo matrix
    1109         for i, j, v in zip(A.row, A.col, A.data):
    1110             edge = (i, j, {'weight': 1.0 / (v + 1e-5)})
    1111             edges_list.append(edge)
    1112             node_index.append(i)
    1113             node_index.append(j)
    1114     G.add_edges_from(edges_list)
    1115     node_index = list(set(node_index))
    1116     print("radiusG(all radius neighbors ): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
    1117 
    1118     # Add  node attributes
    1119     G = set_node_attributes(G, X)
    1120     if G.number_of_nodes() < X.shape[0]:
    1121         print("Graph Fixing ......... ")
    1122         # Fixed Graph
    1123         node_all_index = [nd for nd in range(X.shape[0])]
    1124         node_non_index = list(set(node_all_index) - set(node_index))
    1125         for ndi in node_non_index:
    1126             ptq = X[ndi:ndi+1, :]
    1127             indices, distances = query_GraphND_cKDTree(G, points=ptq, num_feature=X.shape[1])
    1128             add_edge = [(ndi, indices[0], {'weight': 1.0 / (distances[0] + 1e-5)})]
    1129             G.add_edges_from(add_edge)
    1130             # update the new node attributes
    1131             G = update_node_attributes(G, X, node_new=ndi)
    1132             node_index.append(ndi)
    1133         print("radiusG(Fixed): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
    1134         # Edge2Shpfile(G, X_pca, shpfile="edge_shp.shp", data=True, input_crs="EPSG:2436", output_crs=None)
    1135         # X2Shpfile(X_pca, shpfile="X_shp.shp", input_crs="EPSG:2436", output_crs=None)
    1136         # print("Shpfile is saved")
    1137 
    1138     # # onnected components in G
    1139     if merage_components:
    1140         connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
    1141         Len_C = len(connected_components)
    1142         print('Graph Components Numbers: ', Len_C)
    1143         print("Graph Components Connecting ......... ")
    1144         while Len_C >= 2:
    1145             first_components = connected_components[0]
    1146             # first_sub_graph
    1147             first_sub_graph = G.subgraph(first_components).copy()
    1148 
    1149             # remove_nodes_from first_sub_graph for G
    1150             G.remove_nodes_from(first_components)
    1151 
    1152             index_v = [nd for nd, data in first_sub_graph.nodes(data=True)]
    1153             points = X[index_v, :]
    1154 
    1155             index_us, distances = query_GraphND_cKDTree(G, points=points, num_feature=X.shape[1])
    1156 
    1157             index_us, distances = list(index_us), list(distances)
    1158             min_index = distances.index(min(distances))# 返回最小值 index
    1159             v, u, dis = index_v[min_index], index_us[min_index], distances[min_index]
    1160             bridge_edge = [(v, u, {'weight': 1.0 / (dis + 1e-5)})]
    1161             G.add_edges_from(bridge_edge)
    1162 
    1163             # update the new node attributes
    1164             G = update_node_attributes(G, X, node_new=v)
    1165 
    1166             # add the subgraph to G
    1167             sub_graph_edges = [edge for edge in first_sub_graph.edges(data=True)]
    1168             G.add_edges_from(sub_graph_edges)
    1169             nodelist = [node for node in G.nodes(data=False)]
    1170             G = set_part_node_attributes(G, nodelist, X)
    1171 
    1172             connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
    1173             Len_C = len(connected_components)
    1174         print("radiusG(Finally): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
    1175 
    1176     if save:
    1177         output = open('radiusG.pkl', 'wb')
    1178         pickle.dump(G, output)
    1179         output.close()
    1180         print("radiusG is saved")
    1181 
    1182     if shpfile:
    1183         # X Decomposition
    1184         X_pca = XDecomposition(X, n_components=2)
    1185         Edge2Shpfile(G, X_pca, shpfile="edge_shp.shp", data=True, input_crs="EPSG:2436", output_crs=None)
    1186         X2Shpfile(X_pca, shpfile="X_shp.shp", input_crs="EPSG:2436", output_crs=None)
    1187         print("Shpfile is saved")
    1188 
    1189     return G
    1190 # getRBFKernelsGraph
    1191 def getRBFKernelsGraph(X, gamma=1., threshold=5., merage_components=False, n_jobs=-1, shpfile=False):
    1192     params = {}
    1193     params['gama'] = gamma
    1194     K = pairwise_kernels(X, metric='rbf', filter_params=True, n_jobs=n_jobs, **params)  # A kernel matrix K
    1195     # K.tocoo()
    1196     Kcsr = sp.csr_matrix(K)
    1197     K = Kcsr.tocoo()
    1198     print('A kernel matrix K: ', K.shape)
    1199     # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = sigma^-2 ; sigma^2 is the Gaussian kernel of variance.
    1200     affinity = 0.5 * (K + K.T)
    1201     edges_list, node_index = [], []
    1202     if hasattr(affinity, 'tocoo'):
    1203         A = affinity.tocoo()
    1204         # edges_list = [(i, j, {'weight': (v + 1e-5)}) for i, j, v in zip(A.row, A.col, A.data) if
    1205         #               v >= threshold and i < j]  # coo matrix
    1206         for i, j, v in zip(A.row, A.col, A.data):  # coo matrix
    1207             if v >= threshold and i < j:
    1208                 edges_list.append((i, j, {'weight': v}))
    1209                 node_index.append(i)
    1210                 node_index.append(j)
    1211     else:
    1212         A = affinity
    1213         (m, _) = A.shape
    1214         # edges_list = [(i, j, {'weight': (A[i, j] + 1e-5)}) for i in range(m) for j in range(m) if
    1215         #               A[i, j] >= threshold and i < j]
    1216         for i in range(m):
    1217             for j in range(m):
    1218                 if A[i, j] >= threshold and i < j:
    1219                     edges_list.append((i, j, {'weight': A[i, j]}))
    1220                     node_index.append(i)
    1221                     node_index.append(j)
    1222 
    1223     G = nx.Graph()
    1224     G.add_edges_from(edges_list)
    1225     node_index = list(set(node_index))
    1226     print("RBFKernelsGraph: number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
    1227 
    1228     # Add  node attributes
    1229     G = set_node_attributes(G, X)
    1230     if G.number_of_nodes() < X.shape[0]:
    1231         print("RBFKernelsGraph Fixing (Isolated Points) ......... ")
    1232         # Fixed Graph
    1233         node_all_index = [nd for nd in range(X.shape[0])]
    1234         node_non_index = list(set(node_all_index) - set(node_index))
    1235         for ndi in node_non_index:
    1236             ptq = X[ndi:ndi + 1, :]
    1237             indices, distances = query_GraphND_cKDTree(G, points=ptq, num_feature=X.shape[1])
    1238             pty = X[indices[0]:indices[0] + 1, :]
    1239             rbf = rbf_kernel(X=ptq, Y=pty, gamma=gamma)
    1240             # add_edge = [(ndi, indices[0], {'weight': 1.0 / (distances[0] + 1e-5)})]
    1241             # add_edge = [(ndi, indices[0], {'weight': rbf[0][0]})]
    1242             w = np.sqrt(np.log(rbf[0][0] + 1e-5) / (-gamma)) / (distances[0] + 1e-5)
    1243             add_edge = [(ndi, indices[0], {'weight': w})]
    1244             G.add_edges_from(add_edge)
    1245             # update the new node attributes
    1246             G = update_node_attributes(G, X, node_new=ndi)
    1247             node_index.append(ndi)
    1248         print("RBFKernelsGraph(Fixed): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(),
    1249                                                                                         G.number_of_edges()))
    1250     # # onnected components in G
    1251     if merage_components:
    1252         connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
    1253         Len_C = len(connected_components)
    1254         print('RBFKernelsGraph Components Numbers: ', Len_C)
    1255         print("RBFKernelsGraph Components Connecting ......... ")
    1256         while Len_C >= 2:
    1257             first_components = connected_components[0]
    1258             # first_sub_graph
    1259             first_sub_graph = G.subgraph(first_components).copy()
    1260 
    1261             # remove_nodes_from first_sub_graph for G
    1262             G.remove_nodes_from(first_components)
    1263 
    1264             index_v = [nd for nd, data in first_sub_graph.nodes(data=True)]
    1265             points = X[index_v, :]
    1266 
    1267             index_us, distances = query_GraphND_cKDTree(G, points=points, num_feature=X.shape[1])
    1268 
    1269             index_us, distances = list(index_us), list(distances)
    1270             min_index = distances.index(min(distances))  # 返回最小值 index
    1271             v, u, dis = index_v[min_index], index_us[min_index], distances[min_index]
    1272 
    1273             ptv, ptu = X[v:v + 1, :], X[u:u + 1, :]
    1274             rbf = rbf_kernel(X=ptv, Y=ptu, gamma=gamma)
    1275             w = np.sqrt(np.log(rbf[0][0] + 1e-5) / (-gamma)) / (dis + 1e-5)
    1276             bridge_edge = [(v, u, {'weight': w})]
    1277             # bridge_edge = [(v, u, {'weight': 1.0 / (dis + 1e-5)})] # only distance
    1278 
    1279             # add bridge edge
    1280             G.add_edges_from(bridge_edge)
    1281 
    1282             # update the new node attributes
    1283             G = update_node_attributes(G, X, node_new=v)
    1284 
    1285             # add the subgraph to G
    1286             sub_graph_edges = [edge for edge in first_sub_graph.edges(data=True)]
    1287             G.add_edges_from(sub_graph_edges)
    1288             nodelist = [node for node in G.nodes(data=False)]
    1289             G = set_part_node_attributes(G, nodelist, X)
    1290 
    1291             connected_components = [list(c) for c in sorted(nx.connected_components(G), key=len, reverse=False)]
    1292             Len_C = len(connected_components)
    1293         print("RBFKernelsGraph(Finally): number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(),
    1294                                                                                           G.number_of_edges()))
    1295 
    1296     if shpfile:
    1297         X_pca = XDecomposition(X, n_components=2)
    1298         if X_pca.shape[1] <= X.shape[1]:
    1299             Edge2Shpfile(G, X_pca, shpfile="edge_shp.shp", data=True, input_crs="EPSG:2436", output_crs=None)
    1300             X2Shpfile(X_pca, shpfile="X_shp.shp", input_crs="EPSG:2436", output_crs=None)
    1301             print("Shpfile is saved")
    1302 
    1303     return G
    1304 
    1305 # Priori Knowledge 1
    1306 def get_pk_MiniBatchKMeans(X, nodes, n_clusters=2, init_size=2, batch_size=500, n_init=10, max_no_improvement=10, verbose=0):
    1307     # # # # # # # # # # # # # # Priori Knowledges # # # # # # # # # # # # # # #
    1308     from sklearn.cluster import MiniBatchKMeans
    1309     from sklearn import metrics
    1310     # Priori Knowledge 1
    1311     # n_clusters = 2542
    1312     mbkm = MiniBatchKMeans(init='k-means++', n_clusters=n_clusters,
    1313                            batch_size=batch_size, n_init=n_init,
    1314                            init_size=init_size,
    1315                            max_no_improvement=max_no_improvement, verbose=verbose).fit(X)
    1316     labels_ = mbkm.labels_
    1317     print("X -> SihCoe score: %0.3f" % metrics.silhouette_score(X, labels_))
    1318     print("X -> CN score: %0.3f" % metrics.calinski_harabasz_score(X, labels_))
    1319     pk_partion = []
    1320     if -1 in labels_:
    1321         index = np.where(labels_ == -1)[0]
    1322         pk_p = nodes[index].tolist()
    1323         for pk in pk_p:
    1324             pk_partion.append([pk])
    1325         n_clusters = len(set(labels_)) - 1
    1326         # print('n_clusters', n_clusters)
    1327         for lab in range(0, n_clusters, 1):
    1328             index = np.where(labels_ == lab)[0]
    1329             pk_p = nodes[index].tolist()
    1330             pk_partion.append(pk_p)
    1331     else:
    1332         n_clusters = len(set(labels_))
    1333         for lab in range(0, n_clusters, 1):
    1334             index = np.where(labels_ == lab)[0]
    1335             pk_p = nodes[index].tolist()
    1336             pk_partion.append(pk_p)
    1337     # sort
    1338     results = []
    1339     for sb_result in pk_partion:
    1340         sb_result.sort()
    1341         results.append(sb_result)
    1342     results.sort()
    1343     pk_partion = results
    1344 
    1345     output = open('mbkm_pk_partion.pkl', 'wb')
    1346     pickle.dump(pk_partion, output)
    1347     output.close()
    1348 
    1349     return pk_partion
    1350 # Priori Knowledge 2
    1351 def get_pk_DBSCAN(X, nodes, eps=0.02, min_samples=25):
    1352     # Priori Knowledge 2
    1353     from sklearn.cluster import DBSCAN
    1354     from sklearn import metrics
    1355     db = DBSCAN(eps=eps, min_samples=min_samples).fit(X)
    1356     labels_ = db.labels_
    1357     print("X -> SC score: %0.3f" % metrics.silhouette_score(X, labels_))
    1358     print("X -> CN score: %0.3f" % metrics.calinski_harabasz_score(X, labels_))
    1359     pk_partion = []
    1360     if -1 in labels_:
    1361         index = np.where(labels_ == -1)[0]
    1362         pk_p = nodes[index].tolist()
    1363         for pk in pk_p:
    1364             pk_partion.append([pk])
    1365         n_clusters = len(set(labels_)) - 1
    1366         for lab in range(0, n_clusters, 1):
    1367             index = np.where(labels_ == lab)[0]
    1368             pk_p = nodes[index].tolist()
    1369             pk_partion.append(pk_p)
    1370     else:
    1371         n_clusters = len(set(labels_))
    1372         print('n_clusters', n_clusters)
    1373         for lab in range(0, n_clusters, 1):
    1374             index = np.where(labels_ == lab)[0]
    1375             pk_p = nodes[index].tolist()
    1376             pk_partion.append(pk_p)
    1377     # sort
    1378     results = []
    1379     for sb_result in pk_partion:
    1380         sb_result.sort()
    1381         results.append(sb_result)
    1382     results.sort()
    1383     pk_partion = results
    1384     # print(pk_partion)
    1385     # print(len(pk_partion))
    1386 
    1387     output = open('db_pk_partion.pkl', 'wb')
    1388     pickle.dump(pk_partion, output)
    1389     output.close()
    1390 
    1391     return pk_partion
    1392 
    1393 # get_X_fromSHP
    1394 def get_X_fromSHP(filename=r'./park/BJ_park_m.shp', epsg4326_srid=2436, plot=True):
    1395     gdata = gpd.read_file(filename=filename, bbox=None, mask=None, rows=None)#读取磁盘上的矢量文件
    1396     # gdata = gpd.read_file(filename=r'CUsersjeshyDocumentsArcGISJ50.gdb', layer='BOUA')#读取gdb中的矢量数据
    1397     print('Input geodata crs:', gdata.crs)  # 查看数据对应的投影信息(坐标参考系) ---> epsg == 4326  (WGS_84)
    1398     gdata = gdata.to_crs(epsg=epsg4326_srid)# epsg == 2436 (X, Y)
    1399     print('Output geodata crs:', gdata.crs)
    1400     print('Column names', gdata.columns.values)# 列名
    1401     # print(gdata.head())  # 查看前5行数据
    1402     if plot:
    1403         gdata.plot()
    1404         plt.show()#展示
    1405 
    1406     geo = gdata.geometry.values
    1407     X0, Y0 = geo.x, geo.y
    1408     X = np.zeros(shape=(X0.shape[0], 2), dtype=float)
    1409     for i in range(X0.shape[0]):
    1410         X[i, 0], X[i, 1] = X0[i],  Y0[i]
    1411 
    1412     print('n_samples:', X0.shape[0])
    1413     output = open('X.pkl', 'wb')
    1414     pickle.dump(X, output)
    1415     output.close()
    1416     print('X.pkl is saved')
    1417 
    1418     from sklearn.preprocessing import StandardScaler
    1419     X_trans = StandardScaler().fit_transform(X)
    1420     output = open('trans_X.pkl', 'wb')
    1421     pickle.dump(X_trans, output)
    1422     output.close()
    1423     print('trans_X.pkl is saved')
    1424 
    1425     return X
    1426 # get_G_X_formOSM
    1427 def get_G_X_formOSM(osmfile=r'./osm/bj_six_ring.osm', simplify=True, gexf_save=True, show_graph=False):
    1428     from networkx import Graph
    1429     from osmnx import graph_from_xml, plot_graph, settings, config
    1430     # settings and config
    1431     utn = settings.useful_tags_node
    1432     oxna = settings.osm_xml_node_attrs
    1433     oxnt = settings.osm_xml_node_tags
    1434     utw = settings.useful_tags_way
    1435     oxwa = settings.osm_xml_way_attrs
    1436     oxwt = settings.osm_xml_way_tags
    1437     utn = list(set(utn + oxna + oxnt))
    1438     utw = list(set(utw + oxwa + oxwt))
    1439     crs = settings.default_crs
    1440     config(all_oneway=True, useful_tags_node=utn, useful_tags_way=utw, default_crs=crs)
    1441 
    1442     root = et.parse(osmfile).getroot()
    1443     nodes_dict = {}
    1444     for node in root.findall('node'):
    1445         nodes_dict[int(node.attrib['id'])] = (float(node.attrib['lon']), float(node.attrib['lat']))
    1446 
    1447     M = graph_from_xml(osmfile, simplify=simplify)
    1448     # print('G: number_of_nodes:', M.number_of_nodes(), 'number_of_edges:', M.number_of_edges())
    1449 
    1450     # plot_graph
    1451     if show_graph:
    1452         plot_graph(M,
    1453                     figsize=(16, 16),
    1454                     bgcolor=None,
    1455                     node_color="#999999", node_size=15, node_alpha=None, node_edgecolor="none",
    1456                     edge_color="#999999", edge_linewidth=1, edge_alpha=None,
    1457                     dpi=600,
    1458                     save=True, filepath='./osm.png')
    1459 
    1460     # Convert M to Graph G
    1461     G = nx.Graph()
    1462     G.add_edges_from(Graph(M).edges())
    1463     print("G: number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
    1464 
    1465     # write_gexf
    1466     if gexf_save:
    1467         nx.write_gexf(G, 'osm.gexf')
    1468 
    1469     # relabel node id
    1470     mapping, index, X = {}, 0, []
    1471     for node in G.nodes:
    1472         mapping[node] = index
    1473         lon, lat = nodes_dict[node][0], nodes_dict[node][1]
    1474         X.append(np.array([lon, lat], dtype=float))
    1475         index += 1
    1476     X = np.array(X, dtype=float)
    1477     G = nx.relabel_nodes(G, mapping)
    1478 
    1479     return G, X
    1480 
    1481 def get_initial_partition(pk_partion):
    1482     initial_partition = {nd: clus for clus in range(len(pk_partion)) for nd in pk_partion[clus]}
    1483     return initial_partition
    1484 def findPrioriKnowledges(G, initial_partition=None, edge_data_flg=False):
    1485     import infomap
    1486     infomapX = infomap.Infomap("--two-level")
    1487     # print("Building Infomap network from a NetworkX graph...")
    1488 
    1489     if edge_data_flg:
    1490         for v, u, w in G.edges(data=edge_data_flg):
    1491             e = (v, u, w['weight'])
    1492             infomapX.addLink(*e)
    1493     else:
    1494         for e in G.edges(data=edge_data_flg):
    1495             infomapX.addLink(*e)
    1496 
    1497     # print("Find communities with Infomap...")
    1498     infomapX.run(initial_partition=initial_partition, silent=True)
    1499     # print("Found {} modules with codelength: {}".format(infomapX.numTopModules(), infomapX.codelength))
    1500     communities = {}
    1501     for node in infomapX.iterLeafNodes():
    1502         communities[node.physicalId] = node.moduleIndex()
    1503     nx.set_node_attributes(G, values=communities, name='community')
    1504     return infomapX.numTopModules(), communities
    1505 # color map from http://colorbrewer2.org/
    1506 def drawGraph(G):
    1507     import matplotlib.colors as colors
    1508     # position map
    1509     # pos = nx.spring_layout(G)
    1510     pos = nx.kamada_kawai_layout(G)
    1511     # community ids
    1512     communities = [v for k, v in nx.get_node_attributes(G, 'community').items()]
    1513     numCommunities = max(communities) + 1
    1514     # color map from http://colorbrewer2.org/
    1515     cmapLight = colors.ListedColormap(['#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9','#bc80bd','#ccebc5','#ffed6f'], 'indexed',
    1516                                       numCommunities)
    1517     cmapDark = colors.ListedColormap(['#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928'], 'indexed', numCommunities)
    1518     # Draw edges
    1519     nx.draw_networkx_edges(G, pos)
    1520     # Draw nodes
    1521     nodeCollection = nx.draw_networkx_nodes(G,
    1522                                             pos=pos,
    1523                                             node_color=communities,
    1524                                             cmap=cmapLight)
    1525     # Set node border color to the darker shade
    1526     darkColors = [cmapDark(v) for v in communities]
    1527     nodeCollection.set_edgecolor(darkColors)
    1528     # Draw node labels
    1529     for n in G.nodes():
    1530         plt.annotate(n,
    1531                      xy=pos[n],
    1532                      textcoords='offset points',
    1533                      horizontalalignment='center',
    1534                      verticalalignment='center',
    1535                      xytext=[0, 0],
    1536                      color=cmapDark(communities[n]))
    1537     plt.axis('off')
    1538     # plt.savefig("karate.png")
    1539     plt.show()
    1540 # Priori Knowledge 3
    1541 def get_PKResult(G, initial_partition=None, gshow=False, edge_data_flg=False):
    1542     # print("Partition by Infomap ..........")
    1543     numModules, pks = findPrioriKnowledges(G, initial_partition=initial_partition, edge_data_flg=edge_data_flg)
    1544     if gshow:
    1545         drawGraph(G)
    1546     list_values = [val for val in pks.values()]
    1547     list_nodes = [node for node in pks.keys()]
    1548     print('pk sortting .......')
    1549     result = []
    1550     for ndpa in range(numModules):
    1551         nodes_part_index = np.argwhere(np.array(list_values) == ndpa).ravel()
    1552         nodes_part = list(np.array(list_nodes)[nodes_part_index])
    1553         nodes_part.sort()
    1554         result.append(nodes_part)
    1555     result.sort()
    1556     # print("pk size:", len(result))
    1557     # print("pk result:", result)
    1558 
    1559     # output = open('info_pk_partion.pkl', 'wb')
    1560     # pickle.dump(result, output)
    1561     # output.close()
    1562 
    1563     return result
    1564 
    1565 
    1566 # MaxDIClustering Mian Class
    1567 class MaxDIClustering():
    1568     def __init__(self, affinity='radius',
    1569                  n_neighbors=10,  # getKNNGraph
    1570                  threshold=2.25,  # getKNNGraph, getRBFKernelsGraph
    1571                  radius=2.25,  # getRadiusGraph
    1572                  gamma=0.25,
    1573                  # getRBFKernelsGraph # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = (2sigma)^-2 ; sigma^2 is the Gaussian kernel of variance.
    1574                  merage_components=True,  # getKNNGraph, getRadiusGraph, getRBFKernelsGraph
    1575                  pk='1',  # -- PK
    1576                  n_clusters=2542, init_size=2542,  # get_pk_MiniBatchKMeans -- PK
    1577                  eps=1000, min_samples=50,  # get_pk_DBSCAN -- PK
    1578                  n_jobs=None):
    1579         self.affinity = affinity
    1580         self.radius = radius
    1581         self.n_neighbors = n_neighbors
    1582         self.threshold = threshold
    1583         self.gamma = gamma  # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = (2sigma)^-2 ; sigma^2 is the Gaussian kernel of variance.
    1584         self.merage_components = merage_components
    1585         self.n_clusters = n_clusters,
    1586         self.init_size = init_size,
    1587         self.eps = eps
    1588         self.min_samples = min_samples
    1589         self.pk = pk
    1590         self.n_jobs = n_jobs
    1591 
    1592     def fit(self, X):
    1593         if self.affinity == 'radius':
    1594             self.G = getRadiusGraph(X, radius=self.radius, merage_components=self.merage_components, save=False,
    1595                                     shpfile=False)
    1596         elif self.affinity == 'knn':
    1597             self.G = getKNNGraph(X, n_neighbors=self.n_neighbors, threshold=self.threshold,
    1598                                  merage_components=self.merage_components, save=False, shpfile=False)
    1599         elif self.affinity == 'rbf':  # RBFKernelsGraph # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = (2sigma)^-2 ; sigma^2 is the Gaussian kernel of variance.
    1600             self.G = getRBFKernelsGraph(X, gamma=self.gamma, threshold=self.threshold,
    1601                                         merage_components=self.merage_components, n_jobs=self.n_jobs, shpfile=False)
    1602         else:
    1603             raise ValueError("Unknown Graph Type %r" % self.affinity)
    1604 
    1605         self.nodes = np.array([nd for nd in self.G.nodes], dtype=int)
    1606 
    1607         # Priori Knowledge 1
    1608         if self.pk == '1':
    1609             pk_partion_hat = get_pk_MiniBatchKMeans(X, nodes=self.nodes, n_clusters=self.n_clusters,
    1610                                                 init_size=self.init_size)
    1611             # pk_partion_hat = get_PKResult(self.G, initial_partition=get_initial_partition(pk_partion_hat), gshow=False,
    1612             #                               edge_data_flg=True)
    1613             self.pk_DI_ = get_DI_Parallel(self.G, partion=pk_partion_hat, weight='weight', n_jobs=self.n_jobs,
    1614                                           verbose=0)
    1615         # Priori Knowledge 2
    1616         elif self.pk == '2':
    1617             pk_partion_hat = get_pk_DBSCAN(X_trans=X, nodes=self.nodes, eps=self.eps, min_samples=self.min_samples)
    1618             # pk_partion_hat = get_PKResult(self.G, initial_partition=get_initial_partition(pk_partion_hat), gshow=False,
    1619             #                               edge_data_flg=True)
    1620             self.pk_DI_ = get_DI_Parallel(self.G, partion=pk_partion_hat, weight='weight', n_jobs=self.n_jobs,
    1621                                           verbose=0)
    1622         # Priori Knowledge 3
    1623         elif self.pk == '3':
    1624             pk_partion_hat = get_PKResult(self.G, initial_partition=None, gshow=False, edge_data_flg=True)
    1625             self.pk_DI_ = get_DI_Parallel(self.G, partion=pk_partion_hat, weight='weight', n_jobs=self.n_jobs,
    1626                                           verbose=0)
    1627         # None Priori Knowledge
    1628         else:
    1629             pk_partion_hat = None
    1630 
    1631         self.OSIE_ = get_oneStruInforEntropy(self.G, weight='weight')
    1632         self.OSIE_ = float('%.3f' % self.OSIE_)
    1633 
    1634         self.results_, self.DIs_ = iter_maxDI(self.G, iter_max=100, pk_partion=pk_partion_hat, weight='weight',
    1635                                              n_jobs=self.n_jobs, verbose=0)
    1636         self.DI_ = self.DIs_[-1]
    1637 
    1638         self.labels_ = label_result(nsamples=X.shape[0], result=self.results_).astype(int)
    1639 
    1640         self.DI_rate_ = self.DI_ / self.OSIE_
    1641 
    1642         from sklearn.metrics import silhouette_score, calinski_harabasz_score
    1643         self.si_score_ = silhouette_score(X, self.labels_, metric='euclidean')
    1644         self.ch_score_ = calinski_harabasz_score(X, self.labels_)
    1645 
    1646         return self
    1647 
    1648 
    1649 # # # # # # # # # # # # # # # test and Demonstration # # # # # # # # # # # # # # # #
    1650 # test cKDTree and pairwise_kernels Graph
    1651 def test():
    1652 
    1653     # test
    1654     output = open('park_X.pkl', 'rb')
    1655     X = pickle.load(output)
    1656     output.close()
    1657     output = open('busstop_X.pkl', 'rb')
    1658     X = pickle.load(output)
    1659     output.close()
    1660 
    1661     # m, n = X.shape
    1662 
    1663     # show
    1664     print(X.shape)# (42161, 2)
    1665     print(X[[0, 1, 2]])# [[ 451389.26206748 4425262.84838121] .... ]
    1666 
    1667     # cKDTree
    1668     tree = cKDTree(data=X, leafsize=8, compact_nodes=True, balanced_tree=True)
    1669     X_query = X[[0, 1, 2]]
    1670     dis, index = tree.query(x=X_query, k=1, n_jobs=-1)
    1671     X_query_out = X[index]
    1672 
    1673     print(dis, index, sep='
    ')# [0. 0. 0.] 
     [0   1   6931]
    1674     print(X_query)# [[ 451389.26206748 4425262.84838121] ... ]
    1675     print(X_query_out)# [[ 451389.26206748 4425262.84838121] ... ]
    1676 
    1677     M = 25000# 42161 --> size is too large.
    1678     threshold = 0.998
    1679 
    1680     params = {}
    1681     params['gama'] = 1
    1682     K = pairwise_kernels(X[:M, :], metric='rbf', filter_params=True, n_jobs=-1, **params)# A kernel matrix K
    1683 
    1684     # K.tocoo()
    1685     Kcsr = sp.csr_matrix(K)
    1686     K = Kcsr.tocoo()
    1687     print('A kernel matrix K: ', K.shape)
    1688     # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = (2sigma)^-2 ; sigma^2 is the Gaussian kernel of variance.
    1689 
    1690     affinity = 0.5 * (K + K.T)
    1691     edges_list, node_index = [], []
    1692     if hasattr(affinity, 'tocoo'):
    1693         A = affinity.tocoo()
    1694         # edges_list = [(i, j, {'weight': (v + 1e-5)}) for i, j, v in zip(A.row, A.col, A.data) if
    1695         #               v >= threshold and i < j]  # coo matrix
    1696         for i, j, v in zip(A.row, A.col, A.data):  # coo matrix
    1697             if v >= threshold and i < j:
    1698                 edges_list.append((i, j, {'weight': v}))
    1699                 node_index.append(i)
    1700                 node_index.append(j)
    1701     # else:
    1702     #     A = affinity
    1703     #     (m, _) = A.shape
    1704     #     # edges_list = [(i, j, {'weight': (A[i, j] + 1e-5)}) for i in range(m) for j in range(m) if
    1705     #     #               A[i, j] >= threshold and i < j]
    1706     #     for i in range(m):
    1707     #         for j in range(m):
    1708     #             if A[i, j] >= threshold and i < j:
    1709     #                 edges_list.append((i, j, {'weight': A[i, j]}))
    1710     #                 node_index.append(i)
    1711     #                 node_index.append(j)
    1712 
    1713     G = nx.Graph()
    1714     G.add_edges_from(edges_list)
    1715     # node_index = list(set(node_index))
    1716     print("KernelsGraph: number_of_nodes:{0}, number_of_edges:{1}".format(G.number_of_nodes(), G.number_of_edges()))
    1717 
    1718     # FixingGraph
    1719     G = FixingGraph(G, X, merage_components=True)
    1720 
    1721     # save
    1722     output = open('busstop_KernelsGraph_part.pkl', 'wb')
    1723     pickle.dump(G, output)
    1724     output.close()
    1725 # rbf_Clustering by MaxDIClustering
    1726 def rbf_Clustering():
    1727     # Load the geodata
    1728     output = open('park_X.pkl', 'rb')# park_X = get_X_fromSHP(filename=r'./park/BJ_park_m.shp', epsg4326_srid=2436, plot=True)
    1729     park_X = pickle.load(output)
    1730     output.close()
    1731 
    1732     output = open('busstop_X.pkl', 'rb')# busstop_X = get_X_fromSHP(filename=r'./busstop/BeijingBusStops_84.shp', epsg4326_srid=2436, plot=True)
    1733     busstop_X = pickle.load(output)# (42161, 2) # Maybe MemoryError --> A RBF kernel matrix K: (42161, 42161)!
    1734     output.close()
    1735 
    1736     output = open('./busstop_RBFGraph.pkl', 'rb')
    1737     RBFGraph = pickle.load(output)
    1738     output.close()
    1739 
    1740     print('------------------------------Extract Partion Outlines----------------------------------')
    1741     # Select Anyone Graph Extracted Method: getRadiusGraph、getKNNGraph、getRBFKernelsGraph
    1742     G = getRadiusGraph(X=park_X, radius=500, merage_components=True, save=False, shpfile=False)
    1743     partion_result = get_PKResult(G, initial_partition=None, gshow=False, edge_data_flg=True)
    1744     outline_gdf = get_partion_result_outline(X=park_X, partion_result=partion_result, input_crs="EPSG:2436",
    1745                                              output_crs=None,
    1746                                              shpfile="partion_outline_shp.shp", save_shp=True, plot=False)
    1747     output = open('Extract_Partion_Outlines_gdf.pkl', 'wb')
    1748     pickle.dump(outline_gdf, output)
    1749     output.close()
    1750     print('------------------------------Extract Partion Outlines End----------------------------------')
    1751 
    1752     print('------------------------------RBFKernelsGraph----------------------------------')
    1753     print('park_X......')
    1754     X = park_X
    1755     # Clustering by MaxDIClustering
    1756     max_di_clus = MaxDIClustering(affinity='rbf',
    1757                                   threshold=0.998,  # getKNNGraph, getRBFKernelsGraph
    1758                                   gamma=1.0,
    1759                                   # getRBFKernelsGraph # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = (2sigma)^-2 ; sigma^2 is the Gaussian kernel of variance.
    1760                                   merage_components=True,  # getKNNGraph, getRadiusGraph, getRBFKernelsGraph
    1761                                   pk='3',  # -- PK
    1762                                   n_jobs=4).fit(X)
    1763     output = open('park_X_max_di_clus_rbf.pkl', 'wb')
    1764     pickle.dump(max_di_clus, output)
    1765     output.close()
    1766     print('DI_', max_di_clus.DI_)
    1767     print('OSIE_', max_di_clus.OSIE_)
    1768     print('DI_rate_', max_di_clus.DI_rate_)
    1769     print('si_score_', max_di_clus.si_score_)
    1770     # DI_ 6.775
    1771     # OSIE_ 9.205
    1772     # DI_rate_ 0.736
    1773     # si_score_ 0.299
    1774 
    1775     print('busstop_X......')
    1776     X = busstop_X  # (42161, 2) # MemoryError: Unable to allocate 13.2 GiB for an array with shape (42161, 42161) and data type float64
    1777     # # X clip by beijing-6ring-road.osm # #
    1778     X_gdf = X2GDF(X, input_crs="EPSG:2436", output_crs=None, plot=None)
    1779     # _, __, gpoly_mask = get_poly_from_osm_circleline(circleline_osmfile=r"../osm/beijing_car_seg_6ring road.osm", toepsg=2436)  # (23946, 2) # MemoryError too
    1780     _, __, gpoly_mask = get_poly_from_osm_circleline(circleline_osmfile=r"../osm/beijing_5ring.osm", toepsg=2436)
    1781     gdata = gpd.clip(gdf=X_gdf, mask=gpoly_mask, keep_geom_type=True)
    1782     X = GDF2X(gdata, to_epsg=2436, plot=False)
    1783     # Clustering by MaxDIClustering
    1784     max_di_clus = MaxDIClustering(affinity='rbf',
    1785                                   threshold=0.998,  # getKNNGraph, getRBFKernelsGraph
    1786                                   gamma=1.0,
    1787                                   # getRBFKernelsGraph # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = sigma^-2 ; sigma^2 is the Gaussian kernel of variance.
    1788                                   merage_components=True,  # getKNNGraph, getRadiusGraph, getRBFKernelsGraph
    1789                                   pk='3',  # -- PK
    1790                                   n_jobs=4).fit(X)
    1791     output = open('busstop_X_max_di_clus_rbf.pkl', 'wb')
    1792     pickle.dump(max_di_clus, output)
    1793     output.close()
    1794     print('DI_', max_di_clus.DI_)
    1795     print('OSIE_', max_di_clus.OSIE_)
    1796     print('DI_rate_', max_di_clus.DI_rate_)
    1797     print('si_score_', max_di_clus.si_score_)
    1798     # A kernel matrix K:  (13318, 13318)
    1799     # RBFKernelsGraph: number_of_nodes:8278, number_of_edges:15797
    1800     # RBFKernelsGraph Fixing (Isolated Points) .........
    1801     # RBFKernelsGraph(Fixed): number_of_nodes:13318, number_of_edges:20837
    1802     # RBFKernelsGraph Components Numbers:  2271
    1803     # RBFKernelsGraph Components Connecting .........
    1804     # RBFKernelsGraph(Finally): number_of_nodes:13318, number_of_edges:23107
    1805     # DI_ 7.862
    1806     # OSIE_ 9.827
    1807     # DI_rate_ 0.800
    1808     # si_score_ 0.694
    1809 
    1810     G = RBFGraph  # 提前使用更高配置计算机计算得到的 RBF-Graph
    1811     X = busstop_X  # (42161, 2)
    1812     # FixingGraph
    1813     G = FixingGraph(G, X, merage_components=True)
    1814 
    1815     pk_partion_hat = get_PKResult(G, initial_partition=None, gshow=False, edge_data_flg=True)
    1816     OSIE_ = float('%.3f' % get_oneStruInforEntropy(G, weight='weight'))
    1817 
    1818     # pk_DI_ = get_DI_Parallel(G, partion=pk_partion_hat, weight='weight', n_jobs=-1, verbose=0)
    1819     results_, DI_ = iter_maxDI(G, iter_max=100, pk_partion=pk_partion_hat, weight='weight', n_jobs=-1, verbose=0)
    1820 
    1821     DI_ = DI_[-1]
    1822     labels_ = label_result(nsamples=X.shape[0], result=results_).astype(int)
    1823     DI_rate_ = DI_ / OSIE_
    1824     from sklearn.metrics import silhouette_score, calinski_harabasz_score
    1825     si_score_ = silhouette_score(X, labels_, metric='euclidean')
    1826     ch_score_ = calinski_harabasz_score(X, labels_)
    1827     print('DI_', DI_)
    1828     print('OSIE_', OSIE_)
    1829     print('DI_rate_', DI_rate_)
    1830     print('si_score_', si_score_)
    1831     # DI_ 8.751
    1832     # OSIE_ 10.52
    1833     # DI_rate_ 0.832
    1834     # si_score_ 0.587
    1835     print('------------------------------RBFKernelsGraph - end----------------------------------')
    1836 # test_StruInforEntropy
    1837 def test_StruInforEntropy():
    1838     # 无权重图 or all weights == 1.0
    1839     G = nx.Graph()
    1840     edges_list = [(0, 1, {'weight': 1.0}),
    1841                   (0, 2, {'weight': 1.0}),  # test: outer point
    1842                   (0, 3, {'weight': 1.0}),
    1843                   # (0, 4, {'weight': 1.0}),
    1844                   (1, 2, {'weight': 1.0}),  # test: outer point
    1845                   (1, 3, {'weight': 1.0}),
    1846                   (2, 3, {'weight': 1.0}),  # test: outer point
    1847                   (2, 4, {'weight': 5.0}),
    1848                   (4, 5, {'weight': 1.0}),
    1849                   (4, 7, {'weight': 1.0}),
    1850                   (5, 6, {'weight': 1.0}),
    1851                   (5, 7, {'weight': 1.0}),
    1852                   (5, 8, {'weight': 1.0}),
    1853                   (6, 8, {'weight': 1.0}),
    1854                   (7, 8, {'weight': 1.0}),
    1855                   (8, 9, {'weight': 5.0}),
    1856                   (9, 10, {'weight': 1.0}),
    1857                   (9, 12, {'weight': 1.0}),
    1858                   (9, 13, {'weight': 1.0}),
    1859                   (10, 11, {'weight': 1.0}),
    1860                   (10, 12, {'weight': 1.0}),
    1861                   (11, 12, {'weight': 1.0}),
    1862                   (11, 13, {'weight': 1.0}),
    1863                   (12, 13, {'weight': 1.0})]
    1864     G.add_edges_from(edges_list)
    1865 
    1866     # plot graph
    1867     show_graph(G, with_labels=True)
    1868 
    1869     # a = [9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
    1870     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
    1871     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
    1872     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
    1873     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
    1874     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
    1875     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
    1876     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
    1877     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
    1878     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
    1879     #      9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492,
    1880     #      10.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492, 9.492]
    1881     # total_var = np.var(a)  # 计算总体方差 ( 计算时除以样本数 N )
    1882     # sample_var = np.var(a, ddof=1)  # 计算样本方差 ( 计算时除以 N - 1 )
    1883 
    1884     # # min2DStruInforEntropyPartition
    1885     # results = min2DStruInforEntropyPartition(G, weight='weight')
    1886     # print("Partition-Size(by min2DHG):", len(results))
    1887     # print("Partition(by min2DHG):", results)
    1888     # # 2D-StruInforEntropy
    1889     # HG, Vj, g_j, Gj_deg = StruInforEntropy(G, partition=results, weight='weight')
    1890     # oneHG = get_oneStruInforEntropy(G, weight='weight')
    1891     # print("1DStruInforEntropy:", oneHG)
    1892     # print("2DStruInforEntropy:", HG)
    1893     # print('--'*15)
    1894 
    1895     # # maxDI
    1896     # results = maxDIPartition(G, weight='weight')# 基础版
    1897     # print("Partition-Size(by maxDI):", len(results))
    1898     # print("Partition(by maxDI):", results)
    1899     # # 2D-StruInforEntropy
    1900     # HG, Vj, g_j, Gj_deg = StruInforEntropy(G, partition=results, weight='weight')
    1901     # oneHG = get_oneStruInforEntropy(G, weight='weight')
    1902     # print("1DStruInforEntropy:", oneHG)
    1903     # print("2DStruInforEntropy:", HG)
    1904     # print("maxDI:", oneHG-HG)
    1905     # oneHG, Vj, g_j, Gj_deg = get_oneStruInforEntropy_Partition(G, partition=results, weight='weight')
    1906     # print("1DStruInforEntropy_Partition:", oneHG)
    1907 
    1908     # print('--'*15)
    1909     # results = [[0, 1, 2, 3], [4, 5, 6, 7, 8], [9, 10, 11, 12, 13]]
    1910     # print(results)
    1911     # HG, Vj, g_j, Gj_deg = StruInforEntropy(G, partition=results, weight='weight')
    1912     # print("2DStruInforEntropy:", HG)
    1913     # print('Vj', Vj)
    1914     # print('g_j', g_j)
    1915     # print('--'*15)
    1916 
    1917     # edges_di = []
    1918     # edges = [(u, v, w['weight']) for u, v, w in G.edges(data=True)]
    1919     # row, col, data = get_coo_matrix_from_G(G, weight='weight')
    1920     # for edge in edges:
    1921     #     # DI_ij = delt_RXij(Xi=[edge[0]], Xj=[edge[1]], G=G, weight='weight')
    1922     #     DI_ij = deltDI_ij(row, col, data, ndq_a=[edge[0]], ndq_b=[edge[1]])
    1923     #     DI_ij = float("%.3f" % (DI_ij))
    1924     #     delt_di = ((edge[0], edge[1]), edge[2], DI_ij)
    1925     #     edges_di.append(delt_di)
    1926     #     print(delt_di)
    1927 
    1928     # print('--' * 15)
    1929     # results, DI = iter_maxDI(G, iter_max=50, pk_partion=None, weight='weight', n_jobs=4, verbose=0)  # 并行加速
    1930     # print(results)
    1931     # print(DI[-1])
    1932     # print(DI)
    1933     # print('--' * 15)
    1934 
    1935     # results, DI = get_iter_maxDICluster(G, iter_max=50)  # 并行加速
    1936     # print(results)
    1937     # print(DI[-1])
    1938     # print(DI)
    1939     # print('--' * 15)
    1940 
    1941     # maxDI
    1942     results = maxDI(G, weight='weight')  # 优化版
    1943     print("Partition-Size(by maxDI):", len(results))
    1944     # 2D-StruInforEntropy
    1945     HG, Vj, g_j, Gj_deg = StruInforEntropy(G, partition=results, weight='weight')
    1946     oneHG = get_oneStruInforEntropy(G, weight='weight')
    1947     print("1DStruInforEntropy:", float('%.3f' % oneHG))
    1948     print("2DStruInforEntropy:", float('%.3f' % HG))
    1949     # get_DI by Parallel
    1950     DI = get_DI_Parallel(G, partion=results, weight='weight', n_jobs=-1)
    1951     print("DI:", DI)
    1952     partion_test = [[0, 1, 2, 3], [4, 5, 6, 7, 8], [9, 10, 11, 12, 13]]
    1953     DI = get_DI_Parallel(G, partion=partion_test, weight='weight', n_jobs=-1)
    1954     print("test-DI:", DI)
    1955     row, col, data = get_coo_matrix_from_G(G, weight='weight')
    1956     for i in range(len(results)):
    1957         for j in range(i+1, len(results)):
    1958             ndq_a, ndq_b = results[i], results[j]
    1959             DI_ij = deltDI_ij(row, col, data, ndq_a=ndq_a, ndq_b=ndq_b)
    1960             DI_ij = float("%.3f" % (DI_ij))
    1961             print(ndq_a, '+', ndq_b, '-', ndq_a + ndq_b, 'deltDI=', DI_ij)
    1962 
    1963     # row, col, data = get_coo_matrix_from_G(G, weight='weight')
    1964     # ndq_a = [0, 1, 2, 3]
    1965     # ndq_b = [4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
    1966     # Vi, Vj, Vij, gi, gj, gij, deg_i, deg_j, deg_ij = get_Vgdeg(row, col, data, ndq_a, ndq_b)
    1967     # V_G = np.sum(data) * 2.0
    1968     # ndq = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
    1969     # Vi, gi, deg_i = get_Vigidegi(row, col, data, ndq)
    1970 # typical_MaxDIClustering_Demonstration
    1971 def typical_MaxDIClustering_Demonstration():
    1972     # Load the geodata
    1973     output = open('park_X.pkl', 'rb')# park_X = get_X_fromSHP(filename=r'./park/BJ_park_m.shp', epsg4326_srid=2436, plot=True)
    1974     park_X = pickle.load(output)
    1975     output.close()
    1976 
    1977     output = open('busstop_X.pkl', 'rb')# busstop_X = get_X_fromSHP(filename=r'./busstop/BeijingBusStops_84.shp', epsg4326_srid=2436, plot=True)
    1978     busstop_X = pickle.load(output)# (42161, 2) # Maybe MemoryError --> A RBF kernel matrix K: (42161, 42161)!
    1979     output.close()
    1980 
    1981     print('------------------------------Extract Partion Outlines----------------------------------')
    1982     # Select Anyone Graph Extracted Method: getRadiusGraph、getKNNGraph、getRBFKernelsGraph
    1983     G = getRadiusGraph(X=park_X, radius=500, merage_components=True, save=False, shpfile=False)
    1984     partion_result = get_PKResult(G, initial_partition=None, gshow=False, edge_data_flg=True)
    1985     outline_gdf = get_partion_result_outline(X=park_X, partion_result=partion_result, input_crs="EPSG:2436",
    1986                                              output_crs=None,
    1987                                              shpfile="partion_outline_shp.shp", save_shp=True, plot=False)
    1988     output = open('Extract_Partion_Outlines_gdf.pkl', 'wb')
    1989     pickle.dump(outline_gdf, output)
    1990     output.close()
    1991     print('------------------------------Extract Partion Outlines End----------------------------------')
    1992 
    1993     # Clustering for demonstration
    1994     X = park_X
    1995     # Clustering by MaxDIClustering
    1996     max_di_clus = MaxDIClustering(affinity='rbf',
    1997                                   threshold=0.998,  # other: getKNNGraph, getRBFKernelsGraph
    1998                                   gamma=1.0,
    1999                                   # for getRBFKernelsGraph # K(x, y) = exp(-gamma ||x-y||^2) ----> gamma = (2sigma)^-2 ; sigma^2 is the Gaussian kernel of variance.
    2000                                   merage_components=True,  # other: getKNNGraph, getRadiusGraph, getRBFKernelsGraph
    2001                                   pk='3',  # -- PK
    2002                                   n_jobs=4).fit(X)
    2003     print('DI_', max_di_clus.DI_)
    2004     print('OSIE_', max_di_clus.OSIE_)
    2005     print('DI_rate_', max_di_clus.DI_rate_)
    2006     print('si_score_', max_di_clus.si_score_)
    2007 # # # # # # # # # # # # # # # test and Demonstration - end # # # # # # # # # # # # # # # #
    2008 
    2009 def main():
    2010     # # 1. test cKDTree and pairwise_kernels Graph
    2011     # test()
    2012 
    2013     # 2. test_StruInforEntropy
    2014     test_StruInforEntropy()
    2015 
    2016     # # 3. rbf_Clustering by MaxDIClustering
    2017     # rbf_Clustering()
    2018 
    2019     # # 4. typical_MaxDIClustering_Demonstration
    2020     # typical_MaxDIClustering_Demonstration()
    2021 
    2022 if __name__ == '__main__':
    2023     main()
    个人学习记录
  • 相关阅读:
    1030
    Android网络:开发浏览器(二)——功能完善之长按网页图片菜单
    表达式(四则运算)计算的算法
    [置顶] 得失寸心知
    参考storm中的RotatingMap实现key超时处理
    分布式事务 & 两阶段提交 & 三阶段提交
    遗传算法
    模拟退火算法
    Mysql死锁问题解决方式 & 聚簇索引、隔离级别等知识
    Mysql表锁、行锁、页锁
  • 原文地址:https://www.cnblogs.com/jeshy/p/15110393.html
Copyright © 2011-2022 走看看