上一节实现了基础的GCN:https://www.cnblogs.com/xiximayou/p/13550000.html
这一节我们继续实现graphSAGE。
加载数据:load_cora.py
import numpy as np import scipy.sparse as sp import torch from sklearn.preprocessing import LabelBinarizer def normalize_adj(adjacency): adjacency += sp.eye(adjacency.shape[0]) degree = np.array(adjacency.sum(1)) d_hat = sp.diags(np.power(degree, -0.5).flatten()) return d_hat.dot(adjacency).dot(d_hat).tocoo() def normalize_features(features): return features / features.sum(1) def load_data(path="/content/drive/My Drive/nlpdata/cora/", dataset="cora"): """Load citation network dataset (cora only for now)""" print('Loading {} dataset...'.format(dataset)) idx_features_labels = np.genfromtxt("{}{}.content".format(path,dataset), dtype=np.dtype(str)) features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32) encode_onehot = LabelBinarizer() labels = encode_onehot.fit_transform(idx_features_labels[:, -1]) # build graph idx = np.array(idx_features_labels[:, 0], dtype=np.int32) idx_map = {j: i for i, j in enumerate(idx)} edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset), dtype=np.int32) edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), dtype=np.int32).reshape(edges_unordered.shape) adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), shape=(labels.shape[0], labels.shape[0]), dtype=np.float32) features = normalize_features(features) adj = normalize_adj(adj) idx_train = range(140) idx_val = range(200, 500) idx_test = range(500, 1500) features = torch.FloatTensor(np.array(features)) labels = torch.LongTensor(np.where(labels)[1]) num_nodes = features.shape[0] train_mask = np.zeros(num_nodes, dtype=np.bool) val_mask = np.zeros(num_nodes, dtype=np.bool) test_mask = np.zeros(num_nodes, dtype=np.bool) train_mask[idx_train] = True val_mask[idx_val] = True test_mask[idx_test] = True return adj, features, labels, train_mask, val_mask, test_mask """ adj, features, labels, train_mask, val_mask, test_mask= load_data() print(adj.shape) print(features.shape) print(labels.shape) print(train_mask.shape, val_mask.shape, test_mask.shape) """
采样:sampling.py
import numpy as np def sampling(src_nodes, sample_num, neighbor_table): """根据源节点采样指定数量的邻居节点,注意使用的是有放回的采样; 某个节点的邻居节点数量少于采样数量时,采样结果出现重复的节点 Arguments: src_nodes {list, ndarray} -- 源节点列表 sample_num {int} -- 需要采样的节点数 neighbor_table {dict} -- 节点到其邻居节点的映射表 Returns: np.ndarray -- 采样结果构成的列表 """ results = [] for sid in src_nodes: # 从节点的邻居中进行有放回地进行采样 res = np.random.choice(neighbor_table[sid], size=(sample_num, )) results.append(res) return np.asarray(results).flatten() def multihop_sampling(src_nodes, sample_nums, neighbor_table): """根据源节点进行多阶采样 Arguments: src_nodes {list, np.ndarray} -- 源节点id sample_nums {list of int} -- 每一阶需要采样的个数 neighbor_table {dict} -- 节点到其邻居节点的映射 Returns: [list of ndarray] -- 每一阶采样的结果 """ sampling_result = [src_nodes] for k, hopk_num in enumerate(sample_nums): hopk_result = sampling(sampling_result[k], hopk_num, neighbor_table) sampling_result.append(hopk_result) return sampling_result
建立模型:grapgsage.py
import torch import torch.nn as nn import torch.nn.functional as F import torch.nn.init as init class NeighborAggregator(nn.Module): def __init__(self, input_dim, output_dim, use_bias=False, aggr_method="mean"): """聚合节点邻居 Args: input_dim: 输入特征的维度 output_dim: 输出特征的维度 use_bias: 是否使用偏置 (default: {False}) aggr_method: 邻居聚合方式 (default: {mean}) """ super(NeighborAggregator, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.use_bias = use_bias self.aggr_method = aggr_method self.weight = nn.Parameter(torch.Tensor(input_dim, output_dim)) if self.use_bias: self.bias = nn.Parameter(torch.Tensor(self.output_dim)) self.reset_parameters() def reset_parameters(self): init.kaiming_uniform_(self.weight) if self.use_bias: init.zeros_(self.bias) def forward(self, neighbor_feature): if self.aggr_method == "mean": aggr_neighbor = neighbor_feature.mean(dim=1) elif self.aggr_method == "sum": aggr_neighbor = neighbor_feature.sum(dim=1) elif self.aggr_method == "max": aggr_neighbor = neighbor_feature.max(dim=1) else: raise ValueError("Unknown aggr type, expected sum, max, or mean, but got {}" .format(self.aggr_method)) neighbor_hidden = torch.matmul(aggr_neighbor, self.weight) if self.use_bias: neighbor_hidden += self.bias return neighbor_hidden def extra_repr(self): return 'in_features={}, out_features={}, aggr_method={}'.format( self.input_dim, self.output_dim, self.aggr_method) class SageGCN(nn.Module): def __init__(self, input_dim, hidden_dim, activation=F.relu, aggr_neighbor_method="mean", aggr_hidden_method="sum"): """SageGCN层定义 Args: input_dim: 输入特征的维度 hidden_dim: 隐层特征的维度, 当aggr_hidden_method=sum, 输出维度为hidden_dim 当aggr_hidden_method=concat, 输出维度为hidden_dim*2 activation: 激活函数 aggr_neighbor_method: 邻居特征聚合方法,["mean", "sum", "max"] aggr_hidden_method: 节点特征的更新方法,["sum", "concat"] """ super(SageGCN, self).__init__() assert aggr_neighbor_method in ["mean", "sum", "max"] assert aggr_hidden_method in ["sum", "concat"] self.input_dim = input_dim self.hidden_dim = hidden_dim self.aggr_neighbor_method = aggr_neighbor_method self.aggr_hidden_method = aggr_hidden_method self.activation = activation self.aggregator = NeighborAggregator(input_dim, hidden_dim, aggr_method=aggr_neighbor_method) self.weight = nn.Parameter(torch.Tensor(input_dim, hidden_dim)) self.reset_parameters() def reset_parameters(self): init.kaiming_uniform_(self.weight) def forward(self, src_node_features, neighbor_node_features): neighbor_hidden = self.aggregator(neighbor_node_features) self_hidden = torch.matmul(src_node_features, self.weight) if self.aggr_hidden_method == "sum": hidden = self_hidden + neighbor_hidden elif self.aggr_hidden_method == "concat": hidden = torch.cat([self_hidden, neighbor_hidden], dim=1) else: raise ValueError("Expected sum or concat, got {}" .format(self.aggr_hidden)) if self.activation: return self.activation(hidden) else: return hidden def extra_repr(self): output_dim = self.hidden_dim if self.aggr_hidden_method == "sum" else self.hidden_dim * 2 return 'in_features={}, out_features={}, aggr_hidden_method={}'.format( self.input_dim, output_dim, self.aggr_hidden_method) class GraphSage(nn.Module): def __init__(self, input_dim, hidden_dim, num_neighbors_list): super(GraphSage, self).__init__() self.input_dim = input_dim self.hidden_dim = hidden_dim self.num_neighbors_list = num_neighbors_list self.num_layers = len(num_neighbors_list) self.gcn = nn.ModuleList() self.gcn.append(SageGCN(input_dim, hidden_dim[0])) for index in range(0, len(hidden_dim) - 2): self.gcn.append(SageGCN(hidden_dim[index], hidden_dim[index+1])) self.gcn.append(SageGCN(hidden_dim[-2], hidden_dim[-1], activation=None)) def forward(self, node_features_list): hidden = node_features_list for l in range(self.num_layers): next_hidden = [] gcn = self.gcn[l] for hop in range(self.num_layers - l): src_node_features = hidden[hop] src_node_num = len(src_node_features) neighbor_node_features = hidden[hop + 1] .view((src_node_num, self.num_neighbors_list[hop], -1)) h = gcn(src_node_features, neighbor_node_features) next_hidden.append(h) hidden = next_hidden return hidden[0] def extra_repr(self): return 'in_features={}, num_neighbors_list={}'.format( self.input_dim, self.num_neighbors_list )
主函数:main.py
import torch import numpy as np import torch.nn as nn import torch.optim as optim from graphsage import GraphSage from sampling import multihop_sampling from load_cora import load_data import pickle import sys sys.path.append("/content/drive/My Drive/nlpdata/cora/") INPUT_DIM = 1433 # 输入维度 # Note: 采样的邻居阶数需要与GCN的层数保持一致 HIDDEN_DIM = [128, 7] # 隐藏单元节点数 NUM_NEIGHBORS_LIST = [10, 10] # 每阶采样邻居的节点数 assert len(HIDDEN_DIM) == len(NUM_NEIGHBORS_LIST) BTACH_SIZE = 16 # 批处理大小 EPOCHS = 100 NUM_BATCH_PER_EPOCH = 20 # 每个epoch循环的批次数 LEARNING_RATE = 0.01 # 学习率 DEVICE = "cuda" if torch.cuda.is_available() else "cpu" adjacency, x, y, train_mask, val_mask, test_mask = load_data() out = pickle.load(open("/content/drive/My Drive/nlpdata/cora/ind.cora.graph", "rb"), encoding="latin1") graph = out.toarray() if hasattr(out, "toarray") else out adjacency_dict = graph train_index = np.where(train_mask)[0] train_label = y[train_index] test_index = np.where(test_mask)[0] model = GraphSage(input_dim=INPUT_DIM, hidden_dim=HIDDEN_DIM, num_neighbors_list=NUM_NEIGHBORS_LIST).to(DEVICE) print(model) criterion = nn.CrossEntropyLoss().to(DEVICE) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=5e-4) def train(): model.train() for e in range(EPOCHS): for batch in range(NUM_BATCH_PER_EPOCH): batch_src_index = np.random.choice(train_index, size=(BTACH_SIZE,)) batch_src_label = train_label[batch_src_index].long().to(DEVICE) batch_sampling_result = multihop_sampling(batch_src_index, NUM_NEIGHBORS_LIST, adjacency_dict) batch_sampling_x = [x[idx].to(DEVICE) for idx in batch_sampling_result] batch_train_logits = model(batch_sampling_x) loss = criterion(batch_train_logits, batch_src_label) optimizer.zero_grad() loss.backward() # 反向传播计算参数的梯度 optimizer.step() # 使用优化方法进行梯度更新 #print("Epoch {:03d} Batch {:03d} Loss: {:.4f}".format(e, batch, loss.item())) test() def test(): model.eval() with torch.no_grad(): test_sampling_result = multihop_sampling(test_index, NUM_NEIGHBORS_LIST, adjacency_dict) test_x = [x[idx].to(DEVICE) for idx in test_sampling_result] test_logits = model(test_x) test_label = y[test_index].long().to(DEVICE) predict_y = test_logits.max(1)[1] accuarcy = torch.eq(predict_y, test_label).float().mean().item() print("Test Accuracy: ", accuarcy) if __name__ == '__main__': train()
运行之后:
Loading cora dataset... GraphSage( in_features=1433, num_neighbors_list=[10, 10] (gcn): ModuleList( (0): SageGCN( in_features=1433, out_features=128, aggr_hidden_method=sum (aggregator): NeighborAggregator(in_features=1433, out_features=128, aggr_method=mean) ) (1): SageGCN( in_features=128, out_features=7, aggr_hidden_method=sum (aggregator): NeighborAggregator(in_features=128, out_features=7, aggr_method=mean) ) ) ) Epoch 000 Batch 000 Loss: 1.9266 Epoch 000 Batch 001 Loss: 1.8985 Epoch 000 Batch 002 Loss: 1.8120 Epoch 000 Batch 003 Loss: 1.6504 Epoch 000 Batch 004 Loss: 1.7163 Epoch 000 Batch 005 Loss: 1.6697 Epoch 000 Batch 006 Loss: 1.4684 Epoch 000 Batch 007 Loss: 1.2102 Epoch 000 Batch 008 Loss: 1.2884 Epoch 000 Batch 009 Loss: 1.0167 Epoch 000 Batch 010 Loss: 0.9699 Epoch 000 Batch 011 Loss: 0.9754 Epoch 000 Batch 012 Loss: 0.5973 Epoch 000 Batch 013 Loss: 0.8455 Epoch 000 Batch 014 Loss: 0.9120 Epoch 000 Batch 015 Loss: 0.6430 Epoch 000 Batch 016 Loss: 0.7662 Epoch 000 Batch 017 Loss: 0.8074 Epoch 000 Batch 018 Loss: 0.5895 Epoch 000 Batch 019 Loss: 0.4272 Test Accuracy: 0.3270000219345093...... Epoch 098 Batch 000 Loss: 0.0403 Epoch 098 Batch 001 Loss: 0.1180 Epoch 098 Batch 002 Loss: 0.0855 Epoch 098 Batch 003 Loss: 0.0344 Epoch 098 Batch 004 Loss: 0.0437 Epoch 098 Batch 005 Loss: 0.0531 Epoch 098 Batch 006 Loss: 0.0513 Epoch 098 Batch 007 Loss: 0.0787 Epoch 098 Batch 008 Loss: 0.0396 Epoch 098 Batch 009 Loss: 0.0373 Epoch 098 Batch 010 Loss: 0.0398 Epoch 098 Batch 011 Loss: 0.0337 Epoch 098 Batch 012 Loss: 0.0427 Epoch 098 Batch 013 Loss: 0.0315 Epoch 098 Batch 014 Loss: 0.0720 Epoch 098 Batch 015 Loss: 0.0827 Epoch 098 Batch 016 Loss: 0.1221 Epoch 098 Batch 017 Loss: 0.0374 Epoch 098 Batch 018 Loss: 0.0427 Epoch 098 Batch 019 Loss: 0.0373 Test Accuracy: 0.5540000200271606 Epoch 099 Batch 000 Loss: 0.0577 Epoch 099 Batch 001 Loss: 0.0373 Epoch 099 Batch 002 Loss: 0.0462 Epoch 099 Batch 003 Loss: 0.0511 Epoch 099 Batch 004 Loss: 0.0849 Epoch 099 Batch 005 Loss: 0.0571 Epoch 099 Batch 006 Loss: 0.0478 Epoch 099 Batch 007 Loss: 0.0598 Epoch 099 Batch 008 Loss: 0.0376 Epoch 099 Batch 009 Loss: 0.0414 Epoch 099 Batch 010 Loss: 0.0478 Epoch 099 Batch 011 Loss: 0.0321 Epoch 099 Batch 012 Loss: 0.1014 Epoch 099 Batch 013 Loss: 0.0617 Epoch 099 Batch 014 Loss: 0.0529 Epoch 099 Batch 015 Loss: 0.0325 Epoch 099 Batch 016 Loss: 0.0334 Epoch 099 Batch 017 Loss: 0.0432 Epoch 099 Batch 018 Loss: 0.0939 Epoch 099 Batch 019 Loss: 0.0517 Test Accuracy: 0.5260000228881836
参考:
https://github.com/FighterLYL/GraphNeuralNetwork