AlexNet网络解析
亮点
1.首次引入GPU进行网络加速训练,其中,下图是使用了两块GPU进行训练
2.使用了ReLU激活函数
3.在全连接层的前两层使用了Dropout随机失活神经元的操作,减少过拟合
网络结构
经卷积后矩阵尺寸大小计算公式为:
N = (W-F+P1+P2)/S+1
W-输入图片的大小W*W;F-卷积核大小FxF;
P1:图片上方padding的个数;P2:图片下方padding的个数;S:步长
Conv1
输入:(3,224,224)
W=224,F=11,P=[1,2],S=4,N=(224-11+1+2)/4+1=55
输出:(96,55,55)
Maxpool1
输入:(96,55,55)
W=55,F=3,P=[0,0],S=2,N=(55-3)/2+1=27
输出:(96,27,27)
Conv2
输入:(96,27,27)
W=27,F=5,P=[2,2],S=1,N=(27-5+4)/1+1=27
输出:(256,27,27)
Maxpool2
输入:(256,27,27)
W=27,F=3,P=[0,0],S=2,N=(27-3)/2+1=13
输出:(256,13,13)
Conv3
输入:(256,13,13)
W=13,F=3,P=[1,1],S=1,N=(13-3+1+1)/1+1=13
输出:(384,13,13)
Conv4
输入:(384,13,13)
W=13,F=3,P=[1,1],S=1,N=(13-3+1+1)/1+1=13
输出:(384,13,13)
Conv5
输入:(384,13,13)
W=13,F=3,P=[1,1],S=1,N=(13-3+1+1)/1+1=13
输出:(256,13,13)
代码复现
class AlexNet(nn.Module): def __init__(self,num_classes=1000,init_weights=False): super(AlexNet, self).__init__() # nn.Sequential 简化步骤 self.features = nn.Sequential( # 特征提取层 # Input[3,224,224] output[48,55,55] nn.Conv2d(3,48,kernel_size=11,stride=4,padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3,stride=2), nn.Conv2d(48,128,kernel_size=5,padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3,stride=2), nn.Conv2d(128,192,kernel_size=3,padding=1), nn.ReLU(inplace=True), nn.Conv2d(192, 192, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(192, 128, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.classifier = nn.Sequential( # 全连接层 nn.Dropout(p=0.5), # p:神经元随机失活的比例 nn.Linear(128*6*6,2048), nn.ReLU(inplace=True), nn.Dropout(p=0.5), nn.Linear(2048,2048), nn.ReLU(inplace=True), nn.Linear(2048,num_classes) # num_classes:分类的类别数 ) if init_weights: self._initialize_weights() def forward(self,x): x = self.features(x) x = torch.flatten(x,start_dim=1) x = self.classifier(x) return x # 初始化权重 def _initialize_weights(self): # 遍历整个modules for m in self.modules(): # 发现有nn.Conv2d这个结构 if isinstance(m,nn.Conv2d): # 凯明初始化权重 nn.init.kaiming_normal_(m.weight,mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias,0) # 发现有nn.Linear,则使用正态分布初始化函数 elif isinstance(m,nn.Linear): nn.init.normal_(m.weight,0,0.01) nn.init.constant_(m.bias,0)