我们定义的网络如下所示
VGG( (features): Sequential( (conv0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu0): ReLU(inplace=True) (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): ReLU(inplace=True) (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (norm3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu3): ReLU(inplace=True) (conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (norm4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu4): ReLU(inplace=True) (pool5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (conv6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (norm6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu6): ReLU(inplace=True) (conv7): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (norm7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu7): ReLU(inplace=True) (conv8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (norm8): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu8): ReLU(inplace=True) (pool9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (conv10): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (norm10): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu10): ReLU(inplace=True) (conv11): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (norm11): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu11): ReLU(inplace=True) (conv12): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (norm12): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu12): ReLU(inplace=True) (pool13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (conv14): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (norm14): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu14): ReLU(inplace=True) (conv15): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (norm15): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu15): ReLU(inplace=True) (conv16): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (norm16): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu16): ReLU(inplace=True) ) (classifier): Sequential( (linear1): Linear(in_features=512, out_features=512, bias=True) (norm1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu1): ReLU(inplace=True) (linear2): Linear(in_features=512, out_features=10, bias=True) ) )
我们进行权重初始化
for m in net.modules(): #如果m是nn.Conv2d if isinstance(m, nn.Conv2d): # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(0.5) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.weight.data.normal_(0, 0.01) m.bias.data.zero_()
下面是vgg——16代码中的实现
import math import torch.nn as nn from collections import OrderedDict norm_mean, norm_var = 0.0, 1.0 #conv3-64×2-->maxpool-->conv3-128×2-->maxpool-->conv3-256×3-->maxpool-->conv3-512×3-->maxpool-->conv3-512×3-->maxpool defaultcfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 512] relucfg = [2, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39] convcfg = [0, 3, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37] #定义的vgg模型 class VGG(nn.Module): #初始化传入参数,包括num_classes=10, init_weights=True, cfg=None, compress_rate=None def __init__(self, num_classes=10, init_weights=True, cfg=None, compress_rate=None): super(VGG, self).__init__() self.features = nn.Sequential() #最初,cfg为None if cfg is None: cfg = defaultcfg self.relucfg = relucfg self.covcfg = convcfg self.compress_rate = compress_rate #传入参数cfg[:-1]=[64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512] #为卷积层和池化层,True用于设置是否加入BN,compress_rate表示压缩比 self.features = self.make_layers(cfg[:-1], True, compress_rate) #下面是全连接层用于分类(512)-->(512)-->10 self.classifier = nn.Sequential(OrderedDict([ ('linear1', nn.Linear(cfg[-2], cfg[-1])), ('norm1', nn.BatchNorm1d(cfg[-1])), ('relu1', nn.ReLU(inplace=True)), ('linear2', nn.Linear(cfg[-1], num_classes)), ])) if init_weights: #初始化 self._initialize_weights() def make_layers(self, cfg, batch_norm=True, compress_rate=None): layers = nn.Sequential() #s in_channels = 3 cnt = 0 for i, v in enumerate(cfg): #如果是M,为最大池化层,添加池化层nn.MaxPool2d(kernel_size=2, stride=2) if v == 'M': layers.add_module('pool%d' % i, nn.MaxPool2d(kernel_size=2, stride=2)) #如果不是为卷积层 else: #输入通道(输入通道, 输出通道, kernel_size=3, padding=1) conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) #设置压缩率conv2d.cp_rate conv2d.cp_rate = compress_rate[cnt] cnt += 1 #加入卷积层 layers.add_module('conv%d' % i, conv2d) #加入BN层,v为输出通道数 layers.add_module('norm%d' % i, nn.BatchNorm2d(v)) #加入relu层,inplace=True母的是上层网络Conv2d中传递下来的tensor直接进行修改,这样能够节省运算内存,不用多存储其他变量 layers.add_module('relu%d' % i, nn.ReLU(inplace=True)) #输出通道变输入 in_channels = v #卷积层定义完毕,返回 return layers def forward(self, x): #组装,卷积计算输入[1,3,32,32]-->[1, 512, 2, 2] x = self.features(x) # 平均值池化[1, 512, 2, 2]-->[1, 512, 2, 2] x = nn.AvgPool2d(2)(x) #形状调整[1, 512, 2, 2]-->[1,512] x = x.view(x.size(0), -1) #送入卷积层 x = self.classifier(x) #输出 return x def _initialize_weights(self): for m in self.modules(): #如果m是nn.Conv2d if isinstance(m, nn.Conv2d): #卷积核尺度与输出通道相乘 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels #权重的初始化 m.weight.data.normal_(0, math.sqrt(2. / n)) #偏差的初始化,填充0 if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(0.5) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.weight.data.normal_(0, 0.01) m.bias.data.zero_() def vgg_16_bn(compress_rate=None): return VGG(compress_rate=compress_rate)