3. 模型定义和操作
一个简单两层卷积网络的示例
# convolutional neural network (2 convolutional layers) class ConvNet(nn.Module): def __init__(self, num_classes=10): super(ConvNet, self).__init__() self.layer1 = nn.Sequential( nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2), nn.BatchNorm2d(16), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.layer2 = nn.Sequential( nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.fc = nn.Linear(7*7*32, num_classes) def forward(self, x): out = self.layer1(x) out = self.layer2(out) out = out.reshape(out.size(0), -1) out = self.fc(out) return out model = ConvNet(num_classes).to(device)
卷积层的计算和展示可以用这个网站辅助。
双线性汇合(bilinear pooling)
X = torch.reshape(N, D, H * W) # Assume X has shape N*D*H*W X = torch.bmm(X, torch.transpose(X, 1, 2)) / (H * W) # Bilinear pooling assert X.size() == (N, D, D) X = torch.reshape(X, (N, D * D)) X = torch.sign(X) * torch.sqrt(torch.abs(X) + 1e-5) # Signed-sqrt normalization X = torch.nn.functional.normalize(X) # L2 normalization
多卡同步 BN(Batch normalization)
当使用 torch.nn.DataParallel 将代码运行在多张 GPU 卡上时,PyTorch 的 BN 层默认操作是各卡上数据独立地计算均值和标准差,同步 BN 使用所有卡上的数据一起计算 BN 层的均值和标准差,缓解了当批量大小(batch size)比较小时对均值和标准差估计不准的情况,是在目标检测等任务中一个有效的提升性能的技巧。
sync_bn = torch.nn.SyncBatchNorm(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
将已有网络的所有BN层改为同步BN层
def convertBNtoSyncBN(module, process_group=None): '''Recursively replace all BN layers to SyncBN layer. Args: module[torch.nn.Module]. Network ''' if isinstance(module, torch.nn.modules.batchnorm._BatchNorm): sync_bn = torch.nn.SyncBatchNorm(module.num_features, module.eps, module.momentum, module.affine, module.track_running_stats, process_group) sync_bn.running_mean = module.running_mean sync_bn.running_var = module.running_var if module.affine: sync_bn.weight = module.weight.clone().detach() sync_bn.bias = module.bias.clone().detach() return sync_bn else: for name, child_module in module.named_children(): setattr(module, name) = convert_syncbn_model(child_module, process_group=process_group)) return module 类似 BN 滑动平均 如果要实现类似 BN 滑动平均的操作,在 forward 函数中要使用原地(inplace)操作给滑动平均赋值。 class BN(torch.nn.Module) def __init__(self): ... self.register_buffer('running_mean', torch.zeros(num_features)) def forward(self, X): ... self.running_mean += momentum * (current - self.running_mean)
计算模型整体参数量
num_parameters = sum(torch.numel(parameter) for parameter in model.parameters())
查看网络中的参数
可以通过model.state_dict()或者model.named_parameters()函数查看现在的全部可训练参数(包括通过继承得到的父类中的参数)
params = list(model.named_parameters()) (name, param) = params[28] print(name) print(param.grad) print('-------------------------------------------------') (name2, param2) = params[29] print(name2) print(param2.grad) print('----------------------------------------------------') (name1, param1) = params[30] print(name1) print(param1.grad)
模型可视化(使用pytorchviz)
szagoruyko/pytorchvizgithub.com
类似 Keras 的 model.summary() 输出模型信息,使用pytorch-summary
sksq96/pytorch-summarygithub.com
模型权重初始化
注意 model.modules() 和 model.children() 的区别:model.modules() 会迭代地遍历模型的所有子层,而 model.children() 只会遍历模型下的一层。
# Common practise for initialization. for layer in model.modules(): if isinstance(layer, torch.nn.Conv2d): torch.nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu') if layer.bias is not None: torch.nn.init.constant_(layer.bias, val=0.0) elif isinstance(layer, torch.nn.BatchNorm2d): torch.nn.init.constant_(layer.weight, val=1.0) torch.nn.init.constant_(layer.bias, val=0.0) elif isinstance(layer, torch.nn.Linear): torch.nn.init.xavier_normal_(layer.weight) if layer.bias is not None: torch.nn.init.constant_(layer.bias, val=0.0) # Initialization with given tensor. layer.weight = torch.nn.Parameter(tensor)
提取模型中的某一层
modules()会返回模型中所有模块的迭代器,它能够访问到最内层,比如self.layer1.conv1这个模块,还有一个与它们相对应的是name_children()属性以及named_modules(),这两个不仅会返回模块的迭代器,还会返回网络层的名字。
# 取模型中的前两层 new_model = nn.Sequential(*list(model.children())[:2] # 如果希望提取出模型中的所有卷积层,可以像下面这样操作: for layer in model.named_modules(): if isinstance(layer[1],nn.Conv2d): conv_model.add_module(layer[0],layer[1])
部分层使用预训练模型
注意如果保存的模型是 torch.nn.DataParallel,则当前的模型也需要是
model.load_state_dict(torch.load('model.pth'), strict=False)
将在 GPU 保存的模型加载到 CPU
model.load_state_dict(torch.load('model.pth', map_location='cpu'))
导入另一个模型的相同部分到新的模型
模型导入参数时,如果两个模型结构不一致,则直接导入参数会报错。用下面方法可以把另一个模型的相同的部分导入到新的模型中。
# model_new代表新的模型 # model_saved代表其他模型,比如用torch.load导入的已保存的模型 model_new_dict = model_new.state_dict() model_common_dict = {k:v for k, v in model_saved.items() if k in model_new_dict.keys()} model_new_dict.update(model_common_dict) model_new.load_state_dict(model_new_dict)
4. 数据处理
计算数据集的均值和标准差
import os import cv2 import numpy as np from torch.utils.data import Dataset from PIL import Image def compute_mean_and_std(dataset): # 输入PyTorch的dataset,输出均值和标准差 mean_r = 0 mean_g = 0 mean_b = 0 for img, _ in dataset: img = np.asarray(img) # change PIL Image to numpy array mean_b += np.mean(img[:, :, 0]) mean_g += np.mean(img[:, :, 1]) mean_r += np.mean(img[:, :, 2]) mean_b /= len(dataset) mean_g /= len(dataset) mean_r /= len(dataset) diff_r = 0 diff_g = 0 diff_b = 0 N = 0 for img, _ in dataset: img = np.asarray(img) diff_b += np.sum(np.power(img[:, :, 0] - mean_b, 2)) diff_g += np.sum(np.power(img[:, :, 1] - mean_g, 2)) diff_r += np.sum(np.power(img[:, :, 2] - mean_r, 2)) N += np.prod(img[:, :, 0].shape) std_b = np.sqrt(diff_b / N) std_g = np.sqrt(diff_g / N) std_r = np.sqrt(diff_r / N) mean = (mean_b.item() / 255.0, mean_g.item() / 255.0, mean_r.item() / 255.0) std = (std_b.item() / 255.0, std_g.item() / 255.0, std_r.item() / 255.0) return mean, std
得到视频数据基本信息
import cv2 video = cv2.VideoCapture(mp4_path) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) fps = int(video.get(cv2.CAP_PROP_FPS)) video.release()
TSN 每段(segment)采样一帧视频
K = self._num_segments if is_train: if num_frames > K: # Random index for each segment. frame_indices = torch.randint( high=num_frames // K, size=(K,), dtype=torch.long) frame_indices += num_frames // K * torch.arange(K) else: frame_indices = torch.randint( high=num_frames, size=(K - num_frames,), dtype=torch.long) frame_indices = torch.sort(torch.cat(( torch.arange(num_frames), frame_indices)))[0] else: if num_frames > K: # Middle index for each segment. frame_indices = num_frames / K // 2 frame_indices += num_frames // K * torch.arange(K) else: frame_indices = torch.sort(torch.cat(( torch.arange(num_frames), torch.arange(K - num_frames))))[0] assert frame_indices.size() == (K,) return [frame_indices[i] for i in range(K)]
常用训练和验证数据预处理
其中 ToTensor 操作会将 PIL.Image 或形状为 H×W×D,数值范围为 [0, 255] 的 np.ndarray 转换为形状为 D×H×W,数值范围为 [0.0, 1.0] 的 torch.Tensor。
train_transform = torchvision.transforms.Compose([ torchvision.transforms.RandomResizedCrop(size=224, scale=(0.08, 1.0)), torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ]) val_transform = torchvision.transforms.Compose([ torchvision.transforms.Resize(256), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ])