1.1 加载数据
1.2 Unsupervised Loss
1.3 Models
1.4 评估与模型使用
1.5 Main
二、PyG版本
class SAGEConv(MessagePassing):
(1)in_channels (int or tuple): Size of each input sample, or :obj:-1 to derive the size from the first input(s) to the forward method.A tuple corresponds to the sizes of source and target dimensionalities.
(2)out_channels (int): Size of each output sample.
(3)normalize (bool, optional): If set to :obj:True, output features will be :math:ℓ 2 \ell_2ℓ
2
-normalized, i.e., :math:
. (default: :obj:False)
(4)root_weight (bool, optional): If set to :obj:False, the layer will not add transformed root node features to the output.(default: :obj:True)
(5)bias (bool, optional): If set to :obj:False, the layer will not learn an additive bias. (default: :obj:True)
(6)**kwargs (optional): Additional arguments of
官方代码:https://github.com/williamleif/graphsage-simple/
如果我们使用pytorch的PyG也能很方便调用:
# -*- coding: utf-8 -*- """ Created on Fri Oct 8 23:16:13 2021 @author: 86493 """ import torch from torch_geometric.datasets import Planetoid from torch_geometric.transforms import NormalizeFeatures dataset = Planetoid(root='C:/dataset/Cora/processed', name='Cora', transform=NormalizeFeatures()) print() print(f'Dataset: {dataset}:') print('======================') print(f'Number of graphs: {len(dataset)}') print(f'Number of features: {dataset.num_features}') print(f'Number of classes: {dataset.num_classes}') data = dataset[0] # Get the first graph object. print() print(data) print('======================') # Gather some statistics about the graph. print(f'Number of nodes: {data.num_nodes}') print(f'Number of edges: {data.num_edges}') print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}') print(f'Number of training nodes: {data.train_mask.sum()}') print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}') print(f'Contains isolated nodes: {data.has_isolated_nodes()}') print(f'Contains self-loops: {data.has_self_loops()}') print(f'Is undirected: {data.is_undirected()}') # 2.可视化节点表征分布的方法 import matplotlib.pyplot as plt from sklearn.manifold import TSNE def visualize(h, color): z = TSNE(n_components=2).fit_transform(h.detach().cpu().numpy()) plt.figure(figsize=(10,10)) plt.xticks([]) plt.yticks([]) plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2") plt.show() # 网络的构造 import torch from torch.nn import Linear import torch.nn.functional as F """ from torch_geometric.nn import GCNConv class GCN(torch.nn.Module): def __init__(self, hidden_channels): super(GCN, self).__init__() torch.manual_seed(12345) self.conv1 = GCNConv(dataset.num_features, hidden_channels) self.conv2 = GCNConv(hidden_channels, dataset.num_classes) def forward(self, x, edge_index): x = self.conv1(x, edge_index) x = x.relu() x = F.dropout(x, p=0.5, training=self.training) x = self.conv2(x, edge_index) return x """ from torch_geometric.nn import SAGEConv class SAGE(torch.nn.Module): def __init__(self, hidden_channels): super(SAGE, self).__init__() torch.manual_seed(12345) self.conv1 = SAGEConv(dataset.num_features, hidden_channels) self.conv2 = SAGEConv(hidden_channels, dataset.num_classes) def forward(self, x, edge_index): x = self.conv1(x, edge_index) x = x.relu() x = F.dropout(x, p=0.5, training=self.training) x = self.conv2(x, edge_index) return x model = SAGE(hidden_channels=16) print(model) # 可视化由未经训练的图神经网络生成的节点表征 model = SAGE(hidden_channels=16) model.eval() out = model(data.x, data.edge_index) visualize(out, color=data.y) # 图神经网络的训练 model = SAGE(hidden_channels=16) optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4) criterion = torch.nn.CrossEntropyLoss() def train(): model.train() optimizer.zero_grad() # Clear gradients. out = model(data.x, data.edge_index) # Perform a single forward pass. loss = criterion(out[data.train_mask], data.y[data.train_mask]) # Compute the loss solely based on the training nodes. loss.backward() # Derive gradients. optimizer.step() # Update parameters based on gradients. return loss for epoch in range(1, 201): loss = train() print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}') # 增加loss折线图 import pandas as pd df = pd.DataFrame(columns = ["Loss"]) # columns列名 df.index.name = "Epoch" for epoch in range(1, 201): loss = train() #df.loc[epoch] = loss.item() df.loc[epoch] = loss.item() df.plot() # 图神经网络的测试 def test(): model.eval() out = model(data.x, data.edge_index) pred = out.argmax(dim=1) # Use the class with highest probability. test_correct = pred[data.test_mask] == data.y[data.test_mask] # Check against ground-truth labels. test_acc = int(test_correct.sum()) / int(data.test_mask.sum()) # Derive ratio of correct predictions. return test_acc test_acc = test() print(f'Test Accuracy: {test_acc:.4f}') # 可视化由训练后的图神经网络生成的节点表征 model.eval() out = model(data.x, data.edge_index) visualize(out, color=data.y)
打印出的结果为:
Dataset: Cora(): ====================== Number of graphs: 1 Number of features: 1433 Number of classes: 7 Data( x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708] ) ====================== Number of nodes: 2708 Number of edges: 10556 Average node degree: 3.90 Number of training nodes: 140 Training node label rate: 0.05 Contains isolated nodes: False Contains self-loops: False Is undirected: True SAGE( (conv1): SAGEConv(1433, 16) (conv2): SAGEConv(16, 7) )
可视化的图如上所示,也可以可视化loss的200个epoch的折线图:
Reference
(1)https://github.com/twjiang/graphSAGE-pytorch/tree/master/src
(2)https://zhuanlan.zhihu.com/p/410407148
(3)https://blog.csdn.net/weixin_44027006/article/details/116888648
(4)GraphSAGE 代码解析(二) - layers.py