# Question 1. DeepSNAP异质图简介

• node_feature: 节点特征The feature of each node (torch.tensor)
• edge_feature: 边特征The feautre of each edge (torch.tensor)
• node_label: 节点标签The label of each node (int)
• node_type: 节点类型The node type of each node (string)
• edge_type: 边类型The edge type of each edge (string)

from pylab import *
import networkx as nx
from networkx.algorithms.community import greedy_modularity_communities
import matplotlib.pyplot as plt
import copy
G = nx.karate_club_graph()
community_map = {}  #key是节点索引，value是所属community的索引（0或1）
for node in G.nodes(data=True):
#node第一个元素是索引，第二个元素是相关数据，如在本例中就是{'club': 'Mr. Hi'}
#默认data=False，就只输出索引
if node[1]["club"] == "Mr. Hi":
community_map[node[0]] = 0
else:
community_map[node[0]] = 1
node_color = []
color_map = {0: 0, 1: 1}
node_color = [color_map[community_map[node]] for node in G.nodes()]
pos = nx.spring_layout(G)  #见下文介绍
plt.figure(figsize=(7, 7))
nx.draw(G, pos=pos, cmap=plt.get_cmap('coolwarm'), node_color=node_color)
show()

## 1.1 Question 1.1：分配Node Type and Node Features

G_eg = nx.path_graph(3)
bb = nx.betweenness_centrality(G)  #bb是一个字典
nx.set_node_attributes(G_eg, bb, "betweenness")
G_eg.nodes[1]["betweenness"]

0.053936688311688304

import torch
def assign_node_types(G, community_map):
"""
输入NetworkX图G和community map（将节点映射到0/1标签的字典）
在G中增加node_type这一节点属性
"""
new_cm={}
for (k,v) in community_map.items():
if v==0:
new_cm[k]='n0'
else:
new_cm[k]='n1'
#我参考的答案里另一种比较优雅的写法：
#node_type_map = {0:'n0', 1:'n1'}
#node_types = {node:node_type_map[community_map[node]] for node in G.nodes()}
nx.set_node_attributes(G,new_cm,'node_type')
def assign_node_labels(G, community_map):
"""
输入NetworkX图G和community map（将节点映射到0/1标签的字典）
在G中增加node_label这一节点属性
"""
nx.set_node_attributes(G,community_map,'node_label')
def assign_node_features(G):
"""
输入NetworkX图G
在G中增加node_feature这一节点属性
"""
feature_vector=[1, 1, 1, 1, 1]
nx.set_node_attributes(G,feature_vector,'node_feature')
assign_node_types(G, community_map)
assign_node_labels(G, community_map)
assign_node_features(G)

for n in G.nodes(data=True):
print(n)
break

(0, {‘club’: ‘Mr. Hi’, ‘node_type’: ‘n0’, ‘node_label’: 0, ‘node_feature’: [1, 1, 1, 1, 1]})

## 1.2 Question 1.2：分配Edge Types

• Edges within club “Mr. Hi”: e0
• Edges within club “Officer”: e1
• Edges between clubs: e2

def assign_edge_types(G, community_map):
"""
输入NetworkX图G和community map（将节点映射到0/1标签的字典）
在G中增加edge_type这一边属性
"""
#注：我觉得题目原来的意思是让用community_map赋值的，但用club属性应该也无所谓……
edge2attr_map={}
for edge in G.edges():
if G.nodes[edge[0]]['club']=='Mr. Hi' and G.nodes[edge[1]]['club']=='Mr. Hi':
edge2attr_map[edge]='e0'
elif G.nodes[edge[0]]['club']=='Officer' and G.nodes[edge[1]]['club']=='Officer':
edge2attr_map[edge]='e1'
else:
edge2attr_map[edge]='e2'
nx.set_edge_attributes(G,edge2attr_map,'edge_type')
assign_edge_types(G, community_map)

#PRW
for edge in G.edges(data=True):
print(edge)
break

(0, 1, {‘edge_type’: ‘e0’})

## 1.3 NetworkX异质图可视化

edge_color = {}
for edge in G.edges():
n1, n2 = edge
if community_map[n1] == community_map[n2] and community_map[n1] == 0:
edge_color[edge] = 'blue'
elif community_map[n1] == community_map[n2] and community_map[n1] == 1:
edge_color[edge] = 'red'
else:
edge_color[edge] = 'green'
G_orig = copy.deepcopy(G)
nx.classes.function.set_edge_attributes(G, edge_color, name='color')
colors = nx.get_edge_attributes(G,'color').values()
labels = nx.get_node_attributes(G, 'node_type')
plt.figure(figsize=(8, 8))
nx.draw(G, pos=pos, cmap=plt.get_cmap('coolwarm'), node_color=node_color, edge_color=colors, labels=labels, font_color='white')
show()

## 1.4 将NetworkX异质图转换为DeepSNAP异质图

from deepsnap.hetero_graph import HeteroGraph
hete = HeteroGraph(G_orig)

G_orig 的节点属性：

G_orig.nodes(data=True)[0]

{'club': 'Mr. Hi',
'node_type': 'n0',
'node_label': 0,
'node_feature': [1, 1, 1, 1, 1]}

G_orig 的边属性：

for e in G_orig.edges(data=True):
print(e)
break

(0, 1, {'edge_type': 'e0'})

G 的边属性：

for e in G.edges(data=True):
print(e)
break

(0, 1, {'edge_type': 'e0', 'color': 'blue'})

DeepSNAP中对应的代码：

def _get_edge_attributes(self, key: str):
r"""
Similar to the _get_node_attributes
"""
attributes = {}
indices = None
# TODO: suspect edge_to_tensor_mapping and edge_to_graph_mapping not useful
if key == "edge_type":
indices = {}
for edge_idx, (head, tail, edge_dict) in enumerate(
self.G.edges(data=True)
):
if key in edge_dict:
tail_type = self.G.nodes[tail]["node_type"]
edge_type = self._get_edge_type(edge_dict)
if message_type not in attributes:
attributes[message_type] = []
attributes[message_type].append(edge_dict[key])
if indices is not None:
if message_type not in indices:
indices[message_type] = []
indices[message_type].append(edge_idx)
if len(attributes) == 0:
return None
for message_type, val in attributes.items():
if torch.is_tensor(attributes[message_type][0]):
attributes[message_type] = torch.stack(val, dim=0)
elif isinstance(attributes[message_type][0], float):
attributes[message_type] = torch.tensor(val, dtype=torch.float)
elif isinstance(attributes[message_type][0], int):
attributes[message_type] = torch.tensor(val, dtype=torch.long)
elif (
isinstance(attributes[message_type][0], str)
and key == "edge_type"
):
continue
else:
raise TypeError(f"Unknown type {key} in edge attributes.")

for hetero_feature in hete:
print(hetero_feature)

## 1.5 Question1.3：每一node type有多少个节点

hete的note_type属性是一个字典，key为node_type值（如 n0），如果key是str则value为类似这样的list：['n0', 'n0', 'n0', 'n0', 'n0', 'n0', 'n0', 'n0', 'n0', 'n0', 'n0', 'n0', 'n0', 'n0', 'n0', 'n0', 'n0']；如果key是int则value为Tensor。

def get_nodes_per_type(hete):
num_nodes_n0=len(hete.node_type['n0'])
num_nodes_n1=len(hete.node_type['n1'])
return num_nodes_n0, num_nodes_n1
num_nodes_n0, num_nodes_n1 = get_nodes_per_type(hete)
print("Node type n0 has {} nodes".format(num_nodes_n0))
print("Node type n1 has {} nodes".format(num_nodes_n1))

Node type n0 has 17 nodes
Node type n1 has 17 nodes

## 1.6 Question 1.4：每一message type有多少条边

message type是node type和edge type的结合体。

hete.message_types

[('n0', 'e0', 'n0'), ('n0', 'e2', 'n1'), ('n1', 'e1', 'n1')]

edge_type是键为message_type值的字典，某一元素示例：

hete.edge_type[('n0', 'e0', 'n0')]

def get_num_message_edges(hete):
"""
返回一个列表，元素为tuple(message_type, num_edge)
"""
message_type_edges = []
for message_type,num_edge in hete.edge_type.items():
message_type_edges.append((message_type,len(num_edge)))
return message_type_edges
message_type_edges = get_num_message_edges(hete)
for (message_type, num_edges) in message_type_edges:
print("Message type {} has {} edges".format(message_type, num_edges))

Message type ('n0', 'e0', 'n0') has 35 edges
Message type ('n0', 'e2', 'n1') has 11 edges
Message type ('n1', 'e1', 'n1') has 32 edges

## 1.7 Question 1.5：数据集划分：每一个split中有多少个节点？

DeepSNAP有内置的数据集划分函数。

from deepsnap.dataset import GraphDataset
def compute_dataset_split_counts(datasets):
"""
入参：数据集划分后得到的字典（key为'train'/'val'/'test'，value为对应的GraphSataset）
返回值：字典（key为'train'/'val'/'test'，value为对应split中含有的有标签节点个数）
"""
data_set_splits = {}
for ds_name,ds in datasets.items():
#print(ds_name)  train
#print(ds[0].node_label_index)  {'n0': tensor([10,  8,  3, 12,  0, 13]), 'n1': tensor([ 0,  8,  1, 15,  5,  7])}
data_set_splits[ds_name]=ds[0].node_label_index['n0'].shape[0]+ds[0].node_label_index['n1'].shape[0]
#这里建议用的node_label_index，但是据我猜测用node_label应该也行
#对node_label_index属性的介绍见下
return data_set_splits
# Splitting the dataset
dataset_train, dataset_val, dataset_test = dataset.split(transductive=True, split_ratio=[0.4, 0.3, 0.3])
datasets = {'train': dataset_train, 'val': dataset_val, 'test': dataset_test}
data_set_splits = compute_dataset_split_counts(datasets)
for dataset_name, num_nodes in data_set_splits.items():
print("{} dataset has {} nodes".format(dataset_name, num_nodes))

train dataset has 12 nodes
val dataset has 10 nodes
test dataset has 12 nodes

HeteroGraph.node_label_index: Slicing node label to get the corresponding split G.node_label[G.node_label_index].（出自Introduction — DeepSNAP 0.2.0 documentation）

data_train=dataset_train[0]
print(data_train.node_label)
print(data_train.node_label_index)
print(hete.node_label)
print(hete.node_label_index)
print(hete.node_label['n0'][data_train.node_label_index['n0']])

{'n0': tensor([0, 0, 0, 0, 0, 0]), 'n1': tensor([1, 1, 1, 1, 1, 1])}
{'n0': tensor([ 5, 13, 14,  9,  0,  2]), 'n1': tensor([ 6, 11,  4, 13,  9, 15])}
{'n0': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 'n1': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])}
{'n0': tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16]), 'n1': tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16])}
tensor([0, 0, 0, 0, 0, 0])

## 1.8 DeepSNAP数据集可视化

from deepsnap.dataset import GraphDataset
# Splitting the dataset
dataset_train, dataset_val, dataset_test = dataset.split(transductive=True, split_ratio=[0.4, 0.3, 0.3])
titles = ['Train', 'Validation', 'Test']
for i, dataset in enumerate([dataset_train, dataset_val, dataset_test]):
n0 = hete._convert_to_graph_index(dataset[0].node_label_index['n0'], 'n0').tolist()
#[21, 5, 7, 8, 16, 11]
#看上下文应该是返回该split中node_type为n0的节点的索引。_convert_to_graph_index()返回Tensor
n1 = hete._convert_to_graph_index(dataset[0].node_label_index['n1'], 'n1').tolist()
plt.figure(figsize=(7, 7))
plt.title(titles[i])
nx.draw(G_orig, pos=pos, node_color="grey", edge_color=colors, labels=labels, font_color='white')
nx.draw_networkx_nodes(G_orig.subgraph(n0), pos=pos, node_color="blue")
#subgraph()应该是返回node-induced subgraph的意思，但我找不到对应的文档，算了
nx.draw_networkx_nodes(G_orig.subgraph(n1), pos=pos, node_color="red")
show()

# 2. 异质图节点预测任务

## 2.1 导包

import copy
import torch
import deepsnap
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.nn as pyg_nn
from sklearn.metrics import f1_score
from deepsnap.hetero_gnn import forward_op
from deepsnap.hetero_graph import HeteroGraph
from torch_sparse import SparseTensor, matmul

## 2.2 Heterogeneous GNN Layer

class HeteroGNNConv(pyg_nn.MessagePassing):
def __init__(self, in_channels_src, in_channels_dst, out_channels):
super(HeteroGNNConv, self).__init__(aggr="mean")
self.in_channels_src = in_channels_src
self.in_channels_dst = in_channels_dst
self.out_channels = out_channels
self.lin_dst=nn.Linear(in_channels_dst,out_channels)  #W_d^{(l)[m]}
self.lin_src=nn.Linear(in_channels_src,out_channels)  #W_s^{(l)[m]}
self.lin_update=nn.Linear(out_channels*2,out_channels)  #W^{(l)[m]}
def forward(
self,
node_feature_src,
node_feature_dst,
edge_index,
size=None,
res_n_id=None,
):
return self.propagate(edge_index,size=size,
node_feature_src=node_feature_src,
node_feature_dst=node_feature_dst,res_n_id=res_n_id)
def message_and_aggregate(self, edge_index, node_feature_src):
# Here edge_index is torch_sparse SparseTensor.
out=matmul(edge_index,node_feature_src,reduce=self.aggr)
#实不相瞒，我没看懂，但是算了，以后再说吧
return out
def update(self, aggr_out, node_feature_dst, res_n_id):
aggr_out=self.lin_src(aggr_out)
node_feature_dst=self.lin_dst(node_feature_dst)
concat_features = torch.cat((node_feature_dst, aggr_out),dim=-1)
#维度-1在这里就是维度1
aggr_out = self.lin_update(concat_features)
return aggr_out

## 2.3 Heterogeneous GNN Wrapper Layer

m  是message type，d  是destination node type。

class HeteroGNNWrapperConv(deepsnap.hetero_gnn.HeteroConv):
#文档：https://snap.stanford.edu/deepsnap/modules/hetero_gnn.html
def __init__(self, convs, args, aggr="mean"):
super(HeteroGNNWrapperConv, self).__init__(convs, None)
self.aggr = aggr
# Map the index and message type
self.mapping = {}
# A numpy array that stores the final attention probability
self.alpha = None
self.attn_proj = None
if self.aggr == "attn":
self.attn_proj = nn.Sequential(
nn.Linear(args['hidden_size'], args['attn_size']),
nn.Tanh(),
nn.Linear(args['attn_size'], 1, bias=False),
)
def reset_parameters(self):
super(HeteroConvWrapper, self).reset_parameters()
if self.aggr == "attn":
for layer in self.attn_proj.children():
layer.reset_parameters()
def forward(self, node_features, edge_indices):
#edge_indices: 字典，key是message type，value是对应的edge_index Tensor
message_type_emb = {}
for message_key, message_type in edge_indices.items():
src_type, edge_type, dst_type = message_key
node_feature_src = node_features[src_type]
node_feature_dst = node_features[dst_type]
edge_index = edge_indices[message_key]
message_type_emb[message_key] = (
self.convs[message_key](
node_feature_src,
node_feature_dst,
edge_index,
)
)
node_emb = {dst: [] for _, _, dst in message_type_emb.keys()}
mapping = {}
for (src, edge_type, dst), item in message_type_emb.items():
mapping[len(node_emb[dst])] = (src, edge_type, dst)
node_emb[dst].append(item)
#mapping示例: {0: ('paper', 'author', 'paper'), 1: ('paper', 'subject', 'paper')}
self.mapping = mapping
for node_type, embs in node_emb.items():
if len(embs) == 1:
node_emb[node_type] = embs[0]
else:
node_emb[node_type] = self.aggregate(embs)
return node_emb
def aggregate(self, xs):
#xs是Tensor（message type的embeddings）的list
if self.aggr == "mean":
x = torch.stack(xs, dim=-1)
return x.mean(dim=-1)
elif self.aggr == "attn":
N = xs[0].shape[0] # Number of nodes for that node type
M = len(xs) # Number of message types for that node type
x = torch.cat(xs, dim=0).view(M, N, -1) # M * N * D
z = self.attn_proj(x).view(M, N) # M * N * 1
z = z.mean(1) # M * 1
alpha = torch.softmax(z, dim=0) # M * 1
# Store the attention result to self.alpha as np array
self.alpha = alpha.view(-1).data.cpu().numpy()
#(len(xs),)
#self.alpha不用于反向传播等操作，仅用于看不同层对不同message type的attention值
alpha = alpha.view(M, 1, 1)
x = x * alpha
return x.sum(dim=0)

## 2.4 初始化Heterogeneous GNN Layers

def generate_convs(hetero_graph, conv, hidden_size, first_layer=False):
"""
入参：
hetero_graph：DeepSNAP HeteroGraph object
conv: HeteroGNNConv
第一层：输入维度为特征维度，输出维度为隐藏层维度
非第一层：输入维度为隐藏层维度，输出维度也是隐藏层维度
返回值：一个 HeteroGNNConv 层的字典，key是message types。
"""
convs = {}
for message_type in hetero_graph.message_types:
if first_layer is True:
src_type = message_type[0]
dst_type = message_type[2]
src_size = hetero_graph.num_node_features(src_type)
dst_size = hetero_graph.num_node_features(dst_type)
convs[message_type] = conv(src_size,dst_size, hidden_size)
else:
convs[message_type] = conv(hidden_size, hidden_size, hidden_size)
return convs

## 2.5 HeteroGNN

self.convs1 → self.bns1 → self.relus1 → self.convs2 → self.bns2 → self.relus2 → self.post_mps

class HeteroGNN(torch.nn.Module):
def __init__(self, hetero_graph, args, aggr="mean"):
super(HeteroGNN, self).__init__()
self.aggr = aggr
self.hidden_size = args['hidden_size']
self.bns1 = nn.ModuleDict()
self.bns2 = nn.ModuleDict()
self.relus1 = nn.ModuleDict()
self.relus2 = nn.ModuleDict()
self.post_mps = nn.ModuleDict()
convs1 = generate_convs(hetero_graph, HeteroGNNConv, self.hidden_size, first_layer=True)
convs2 = generate_convs(hetero_graph, HeteroGNNConv, self.hidden_size)
self.convs1 = HeteroGNNWrapperConv(convs1, args, aggr=self.aggr)
self.convs2 = HeteroGNNWrapperConv(convs2, args, aggr=self.aggr)
for node_type in hetero_graph.node_types:
self.bns1[node_type] = torch.nn.BatchNorm1d(self.hidden_size, eps=1)
self.bns2[node_type] = torch.nn.BatchNorm1d(self.hidden_size, eps=1)
self.post_mps[node_type] = nn.Linear(self.hidden_size, hetero_graph.num_node_labels(node_type))
self.relus1[node_type] = nn.LeakyReLU()
self.relus2[node_type] = nn.LeakyReLU()
def forward(self, node_feature, edge_index):
#node_feature是一个字典，key是node types，values是对应的feature Tensors
#edge_index也是一个字典，字典，key是message types，value是对应的edge_index Tensor
x = node_feature
x = self.convs1(x, edge_index)
x = forward_op(x, self.bns1)  #这个方法介绍见下
x = forward_op(x, self.relus1)
x = self.convs2(x, edge_index)
x = forward_op(x, self.bns2)
x = forward_op(x, self.relus2)
x = forward_op(x, self.post_mps)
return x
def loss(self, preds, y, indices):
loss = 0
loss_func = F.cross_entropy
for node_type in preds:
idx = indices[node_type]
loss += loss_func(preds[node_type][idx], y[node_type][idx])
return loss

forward_op(x, module_dict, **kwargs)：

## 2.6 构建 train() 和 test() 函数

def train(model, optimizer, hetero_graph, train_idx):
model.train()
preds = model(hetero_graph.node_feature, hetero_graph.edge_index)
loss = model.loss(preds, hetero_graph.node_label, train_idx)
loss.backward()
optimizer.step()
return loss.item()
def test(model, graph, indices, best_model=None, best_val=0):
model.eval()
accs = []
for index in indices:
preds = model(graph.node_feature, graph.edge_index)
num_node_types = 0
micro = 0
macro = 0
for node_type in preds:
idx = index[node_type]
pred = preds[node_type][idx]
pred = pred.max(1)[1]
label_np = graph.node_label[node_type][idx].cpu().numpy()
pred_np = pred.cpu().numpy()
micro = f1_score(label_np, pred_np, average='micro')
macro = f1_score(label_np, pred_np, average='macro')
num_node_types += 1
#注意这里，实际上对F1 score求平均是没有意义的
#但是在我们的例子中其实只有一种node type所以也无所谓了……
micro /= num_node_types
macro /= num_node_types
accs.append((micro, macro))
if accs[1][0] > best_val:
best_val = accs[1][0]
best_model = copy.deepcopy(model)
#注意这里要深拷贝！我就被这个深拷贝浅拷贝坑过！
#反正先记住这里要深拷贝好了，以后我还准备专门写博文讲一下这个深拷贝浅拷贝直接引用的事
return accs, best_model, best_val

## 2.7 设置超参

args = {
'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
'hidden_size': 64,
'epochs': 100,
'weight_decay': 1e-5,
'lr': 0.003,
'attn_size': 32,
}

## 2.8 数据集导入及预处理

print("Device: {}".format(args['device']))
#data是一个字典，key是str，value是Tensor
# Message types
message_type_1 = ("paper", "author", "paper")
message_type_2 = ("paper", "subject", "paper")
# Dictionary of edge indices
edge_index = {}
edge_index[message_type_1] = data['pap']
edge_index[message_type_2] = data['psp']
# Dictionary of node features
node_feature = {}
node_feature["paper"] = data['feature']
# Dictionary of node labels
node_label = {}
node_label["paper"] = data['label']
# Load the train, validation and test indices
train_idx = {"paper": data['train_idx'].to(args['device'])}
val_idx = {"paper": data['val_idx'].to(args['device'])}
test_idx = {"paper": data['test_idx'].to(args['device'])}
# Construct a deepsnap tensor backend HeteroGraph
hetero_graph = HeteroGraph(
node_feature=node_feature,
node_label=node_label,
edge_index=edge_index,
directed=True
)
print(f"ACM heterogeneous graph: {hetero_graph.num_nodes()} nodes, {hetero_graph.num_edges()} edges")
# Node feature and node label to device
for key in hetero_graph.node_feature:
hetero_graph.node_feature[key] = hetero_graph.node_feature[key].to(args['device'])
for key in hetero_graph.node_label:
hetero_graph.node_label[key] = hetero_graph.node_label[key].to(args['device'])
# Edge_index to sparse tensor and to device
for key in hetero_graph.edge_index:
edge_index = hetero_graph.edge_index[key]
adj = SparseTensor(row=edge_index[0], col=edge_index[1], sparse_sizes=(hetero_graph.num_nodes('paper'), hetero_graph.num_nodes('paper')))
print(hetero_graph.edge_index[message_type_1])
print(hetero_graph.edge_index[message_type_2])

Device: cuda
ACM heterogeneous graph: {'paper': 3025} nodes, {('paper', 'author', 'paper'): 26256, ('paper', 'subject', 'paper'): 2207736} edges
SparseTensor(row=tensor([   0,    0,    0,  ..., 3024, 3024, 3024], device='cuda:0'),
col=tensor([   8,   20,   51,  ..., 2948, 2983, 2991], device='cuda:0'),
size=(3025, 3025), nnz=26256, density=0.29%)
SparseTensor(row=tensor([   0,    0,    0,  ..., 3024, 3024, 3024], device='cuda:0'),
col=tensor([  75,  434,  534,  ..., 3020, 3021, 3022], device='cuda:0'),
size=(3025, 3025), nnz=2207736, density=24.13%)

## 2.9 Training the Mean Aggregation

best_model = None
best_val = 0
model = HeteroGNN(hetero_graph, args, aggr="mean").to(args['device'])
for epoch in range(args['epochs']):
loss = train(model, optimizer, hetero_graph, train_idx)
accs, best_model, best_val = test(model, hetero_graph, [train_idx, val_idx, test_idx], best_model, best_val)
print(
f"Epoch {epoch + 1}: loss {round(loss, 5)}, "
f"train micro {round(accs[0][0] * 100, 2)}%, train macro {round(accs[0][1] * 100, 2)}%, "
f"valid micro {round(accs[1][0] * 100, 2)}%, valid macro {round(accs[1][1] * 100, 2)}%, "
f"test micro {round(accs[2][0] * 100, 2)}%, test macro {round(accs[2][1] * 100, 2)}%"
)
best_accs, _, _ = test(best_model, hetero_graph, [train_idx, val_idx, test_idx])
print(
f"Best model: "
f"train micro {round(best_accs[0][0] * 100, 2)}%, train macro {round(best_accs[0][1] * 100, 2)}%, "
f"valid micro {round(best_accs[1][0] * 100, 2)}%, valid macro {round(best_accs[1][1] * 100, 2)}%, "
f"test micro {round(best_accs[2][0] * 100, 2)}%, test macro {round(best_accs[2][1] * 100, 2)}%"
)

Best model: train micro 99.83%, train macro 99.83%, valid micro 98.33%, valid macro 98.33%, test micro 87.86%, test macro 87.78%

## 2.10 Training the Attention Aggregation

best_model = None
best_val = 0
output_size = hetero_graph.num_node_labels('paper')
model = HeteroGNN(hetero_graph, args, aggr="attn").to(args['device'])
for epoch in range(args['epochs']):
loss = train(model, optimizer, hetero_graph, train_idx)
accs, best_model, best_val = test(model, hetero_graph, [train_idx, val_idx, test_idx], best_model, best_val)
print(
f"Epoch {epoch + 1}: loss {round(loss, 5)}, "
f"train micro {round(accs[0][0] * 100, 2)}%, train macro {round(accs[0][1] * 100, 2)}%, "
f"valid micro {round(accs[1][0] * 100, 2)}%, valid macro {round(accs[1][1] * 100, 2)}%, "
f"test micro {round(accs[2][0] * 100, 2)}%, test macro {round(accs[2][1] * 100, 2)}%"
)
best_accs, _, _ = test(best_model, hetero_graph, [train_idx, val_idx, test_idx])
print(
f"Best model: "
f"train micro {round(best_accs[0][0] * 100, 2)}%, train macro {round(best_accs[0][1] * 100, 2)}%, "
f"valid micro {round(best_accs[1][0] * 100, 2)}%, valid macro {round(best_accs[1][1] * 100, 2)}%, "
f"test micro {round(best_accs[2][0] * 100, 2)}%, test macro {round(best_accs[2][1] * 100, 2)}%"
)

Best model: train micro 99.67%, train macro 99.67%, valid micro 97.67%, valid macro 97.66%, test micro 85.79%, test macro 85.27%

## 2.11 Attention for each Message Type

if model.convs1.alpha is not None and model.convs2.alpha is not None:
for idx, message_type in model.convs1.mapping.items():
print(f"Layer 1 has attention {model.convs1.alpha[idx]} on message type {message_type}")
for idx, message_type in model.convs2.mapping.items():
print(f"Layer 2 has attention {model.convs2.alpha[idx]} on message type {message_type}")

Layer 1 has attention 0.960588812828064 on message type ('paper', 'author', 'paper')
Layer 1 has attention 0.03941113129258156 on message type ('paper', 'subject', 'paper')
Layer 2 has attention 0.30975428223609924 on message type ('paper', 'author', 'paper')
Layer 2 has attention 0.6902456879615784 on message type ('paper', 'subject', 'pape

