# 图神经网络入门示例：使用PyTorch Geometric 进行节点分类

## 数据集

 wget https://snap.stanford.edu/data/facebook_large.zip


## 创建PyTorch同构数据对象

 from torch.nn.utils.rnn import pad_sequence

mapping = {i: node_id for i, node_id in enumerate(df[index_col].unique())}

with open(os.path.join(data_dir, "musae_facebook_features.json"), "r") as json_file:

xs = []
for index, node_id in mapping.items():
features = features_data.get(str(index), [])
if features:
# Create tensor from feature vector
features_tensor = torch.tensor(features, dtype=torch.float)
xs.append(features_tensor)
else:
xs.append(torch.zeros(1, dtype=torch.float))

# Pad features to have vectors of the same size

# Create tensor of normaized features for nodes

x = (padded_features - mean) / (std + 1e-8)  # final x tensor with normalized features

return x



 def load_edge_csv(path, src_index_col, dst_index_col, **kwargs):

src = df[src_index_col].values
dst = df[dst_index_col].values
edge_index = torch.tensor([src, dst])

return edge_index



 # Create homogeneous graph using PyTorch's Data object
data = Data(x=x, edge_index=edge_index, y=y)


 >>> Data(x=[22470, 31], edge_index=[2, 171002], y=[22470])


## 分割数据

 # Calculate no. of train nodes
num_nodes = data.num_nodes
train_percentage = 0.7
num_train_nodes = int(train_percentage * num_nodes)



 >>> Data(
x=[22470, 31],
edge_index=[2, 171002],
y=[22470],
num_classes=4,
)


## 训练神经网络

1、多层感知网络(MLP)

 from torch.nn import Linear
import torch.nn.functional as F

class MLP(torch.nn.Module):
def __init__(self):
super().__init__()
torch.manual_seed(123)
self.lin1 = Linear(data.num_features, 32)
self.lin2 = Linear(32, 32)
self.lin3 = Linear(32, 16)
self.lin4 = Linear(16, 8)
self.lin5 = Linear(8, data.num_classes)

def forward(self, x):
x = self.lin1(x)
x = F.relu(x)
x = self.lin2(x)
x = F.relu(x)
x = self.lin3(x)
x = F.relu(x)
x = self.lin4(x)
x = F.relu(x)
x = self.lin5(x)
x = torch.softmax(x, dim=1)

return x


 class_weights = torch.tensor([1 / i for i in df_agg_classes["proportion"].values], dtype=torch.float)
model = MLP()
criterion = torch.nn.CrossEntropyLoss(weight=class_weights)


 >>> MLP(
(lin1): Linear(in_features=31, out_features=32, bias=True)
(lin2): Linear(in_features=32, out_features=32, bias=True)
(lin3): Linear(in_features=32, out_features=16, bias=True)
(lin4): Linear(in_features=16, out_features=8, bias=True)
(lin5): Linear(in_features=8, out_features=4, bias=True)
)


 def train():
model.train()
out = model(data.x)  # Perform a single forward pass
optimizer.step()  # Update parameters
return loss


 for epoch in range(1, 1001):
loss = train()
print(f"Epoch: {epoch:03d}, loss: {loss:.4f}")


 def test():
model.eval()
out = model(data.x)
pred = out.argmax(dim=1)  # Select class with the highest probability
test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Calculate fraction of correct predictions
return test_acc


 def visualize(h, color):
z = TSNE(n_components=2).fit_transform(h.detach().cpu().numpy())
plt.figure(figsize=(10, 10))
plt.xticks([])  # create an empty x axis
plt.yticks([])  # create an empty y axis
plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")


2、图卷积网络(GCN)

 from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
def __init__(self):
super().__init__()
torch.manual_seed(123)
self.conv1 = GCNConv(data.num_features, 32)
self.conv2 = GCNConv(32, 32)
self.conv3 = GCNConv(32, 16)
self.conv4 = GCNConv(16, 8)
self.conv5 = GCNConv(8, data.num_classes)

def forward(self, x, edge_index):
x = self.conv1(x, edge_index)
x = F.relu(x)
x = self.conv2(x, edge_index)
x = F.relu(x)
x = self.conv3(x, edge_index)
x = F.relu(x)
x = self.conv4(x, edge_index)
x = F.relu(x)
x = self.conv5(x, edge_index)
x = F.log_softmax(x, dim=1)
return x


 model = GCN()
criterion = torch.nn.CrossEntropyLoss(weight=class_weights)

def train():
model.train()
out = model(data.x, data.edge_index)  # Perform a single forward pass
optimizer.step()  # Update parameters
return loss

for epoch in range(1, 1001):
loss = train()
print(f"Epoch: {epoch:03d}, Loss: {loss:.4f}")


 def test():
model.eval()
out = model(data.x, data.edge_index)  # Pass in features and edges
pred = out.argmax(dim=1)  # Get predicted class
test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Get proportion of correct predictions
return test_acc


GCN模型能对测试集中80%的节点进行正确分类!让我们把它画出来:

## 总结

|
5天前
|

【从零开始学习深度学习】32. 卷积神经网络之稠密连接网络（DenseNet）介绍及其Pytorch实现
【从零开始学习深度学习】32. 卷积神经网络之稠密连接网络（DenseNet）介绍及其Pytorch实现
12 1
|
5天前
|

【从零开始学习深度学习】49.Pytorch_NLP项目实战：文本情感分类---使用循环神经网络RNN
【从零开始学习深度学习】49.Pytorch_NLP项目实战：文本情感分类---使用循环神经网络RNN
15 1
|
5天前
|

【从零开始学习深度学习】31. 卷积神经网络之残差网络（ResNet）介绍及其Pytorch实现
【从零开始学习深度学习】31. 卷积神经网络之残差网络（ResNet）介绍及其Pytorch实现
13 1
|
5天前
|

【从零开始学习深度学习】30. 神经网络中批量归一化层（batch normalization）的作用及其Pytorch实现
【从零开始学习深度学习】30. 神经网络中批量归一化层（batch normalization）的作用及其Pytorch实现
12 1
|
5天前
|

【从零开始学习深度学习】28.卷积神经网络之NiN模型介绍及其Pytorch实现【含完整代码】
【从零开始学习深度学习】28.卷积神经网络之NiN模型介绍及其Pytorch实现【含完整代码】
11 2
|
5天前
|

【从零开始学习深度学习】26.卷积神经网络之AlexNet模型介绍及其Pytorch实现【含完整代码】
【从零开始学习深度学习】26.卷积神经网络之AlexNet模型介绍及其Pytorch实现【含完整代码】
15 3
|
1天前
|

【机器学习】图神经网络：深度解析图神经网络的基本构成和原理以及关键技术
【机器学习】图神经网络：深度解析图神经网络的基本构成和原理以及关键技术
15 2
|
5天前
|

【从零开始学习深度学习】50.Pytorch_NLP项目实战：卷积神经网络textCNN在文本情感分类的运用
【从零开始学习深度学习】50.Pytorch_NLP项目实战：卷积神经网络textCNN在文本情感分类的运用
12 0
|
5天前
|

【从零开始学习深度学习】36. 门控循环神经网络之长短期记忆网络（LSTM）介绍、Pytorch实现LSTM并进行训练预测
【从零开始学习深度学习】36. 门控循环神经网络之长短期记忆网络（LSTM）介绍、Pytorch实现LSTM并进行训练预测
13 2
|
5天前
|

【从零开始学习深度学习】35. 门控循环神经网络之门控循环单元（gated recurrent unit，GRU）介绍、Pytorch实现GRU并进行训练预测
【从零开始学习深度学习】35. 门控循环神经网络之门控循环单元（gated recurrent unit，GRU）介绍、Pytorch实现GRU并进行训练预测
12 0