【9】一些经典CNN结构的pytorch实现

简介: 【9】一些经典CNN结构的pytorch实现

导入主要的包


import math
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import numpy as np


1.LeNet-5


image.png

# 设置批训练大小
batch_size = 10
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5,self).__init__()
        # 卷积层
        self.conv_layer = nn.Sequential(
            # input:torch.Size([batch_size, 3, 32, 32])
            nn.Conv2d(in_channels=3,out_channels=6,kernel_size=5,stride=1,padding=0),  # output:torch.Size([batch_size, 6, 28, 28])
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),   # output:torch.Size([batch_size, 6, 14, 14])
            nn.Conv2d(in_channels=6,out_channels=16,kernel_size=5,stride=1,padding=0), # output:torch.Size([batch_size, 16, 10, 10])
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0)    # output:torch.Size([batch_size, 16, 5, 5])
        )
        # output:torch.Size([batch_size, 16, 5, 5])
        # 全连接层
        self.fullconn_layer = nn.Sequential(
            # input:torch.Size([batch_size, 16*5*5])
            nn.Linear(16*5*5,120),
            nn.ReLU(),
            # input:torch.Size([batch_size, 120])
            nn.Linear(120,84),
            nn.ReLU(),
            # input:torch.Size([batch_size, 84])
            nn.Linear(84,10),           
        )
        # output:torch.Size([10, 10])
    def forward(self,x):
        output = self.conv_layer(x)           # output:torch.Size([batch_size, 16, 5, 5])
        # output = output.view(batch_size,-1)   # output:torch.Size([10, 16*5*5])
        output = output.view(x.size(0),-1)
        output = self.fullconn_layer(output)  # output:torch.Size([10, 10])
        return output
net = LeNet5()


如果是逻辑回归,需要对每一张图片输出数值进行softmax操作,将神经网络的数值变成一个概率,然后通过相关的操作,就可以得到每一张图片最大概率的那个索引就是实际上图片的预测分类标签值


# 假设x就是需要预测的batch_size张图像
x = torch.randn(batch_size,3,32,32)
# 得到神经网络输出后的数值
output = net(x)
# 将数值变成概率
output = F.softmax(output)
# 希望得到最大概率值及其索引
values,index = output.max(dim=1)
# 其中values是每一维的输出最大值
# tensor([0.1120, 0.1110, 0.1096, 0.1108, 0.1110, 0.1104, 0.1109, 0.1127, 0.1097,
#        0.1135], grad_fn=<MaxBackward0>)
# index是输出最大值的索引,也就是预测分类的标签
# tensor([4, 8, 8, 4, 8, 4, 8, 4, 8, 8])
# 此时获得了最大的分类标签便可以预测出其是哪一类


2.AlexNet


image.png

# 设置批训练大小
batch_size = 10
class AlexNet(nn.Module):
    def __init__(self):     
        super(AlexNet,self).__init__()
        # 五层卷积层
        self.conv_layer = nn.Sequential(
            # input:torch.Size([batch_size, 3, 32, 32])
            # C1层: 卷积-->ReLU-->池化-->归一化
            nn.Conv2d(in_channels=3,out_channels=48,kernel_size=3,stride=1,padding=1),  # torch.Size([batch_size, 48, 32, 32])
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),   # torch.Size([batch_size, 48, 16, 16])
            nn.ReLU(),
            nn.BatchNorm2d(48),
            # C2层: 卷积-->ReLU-->池化-->归一化
            nn.Conv2d(in_channels=48,out_channels=128,kernel_size=3,stride=1,padding=1), # torch.Size([batch_size, 128, 16, 16])
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),   # torch.Size([batch_size, 128, 8, 8])
            nn.ReLU(),
            nn.BatchNorm2d(128),
            # C3层: 卷积-->ReLU
            nn.Conv2d(in_channels=128,out_channels=192,kernel_size=3,stride=1,padding=1),  # torch.Size([batch_size, 192, 8, 8])
            nn.ReLU(),
            # C4层: 卷积-->ReLU
            nn.Conv2d(in_channels=192,out_channels=192,kernel_size=3,stride=1,padding=1),  # torch.Size([batch_size, 192, 8, 8])
            nn.ReLU(),
            # C5层: 卷积-->ReLU-->池化
            nn.Conv2d(in_channels=192,out_channels=128,kernel_size=3,stride=1,padding=1),  # torch.Size([batch_size, 128, 8, 8])
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),   # torch.Size([batch_size, 128, 4, 4])
        )
        # output:torch.Size([batch_size, 128, 4, 4])
        # 两层全连接层
        self.fullconn_layer = nn.Sequential(
            # input:torch.Size([batch_size, 2048])
            # 全连接层FC6:全连接 -->ReLU -->Dropout
            nn.Linear(128*4*4,2048),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            # 全连接层FC7:全连接 -->ReLU -->Dropout
            nn.Linear(2048,2048),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            # 输出层
            nn.Linear(2048,10),       
        )
    def forward(self,x):
        output = self.conv_layer(x)          # output:torch.Size([batch_size, 128, 4, 4])
        # output = output.view(batch_size,-1)  # output:torch.Size([batch_size, 128*4*4])
        output = output.view(x.size(0),-1)
        output = self.fullconn_layer(output) # output:torch.Size([batch_size, 10])
        return output
net = AlexNet()


其中,原始paper中的输入图像是224*224,所以以上的Alexnet是改写的,原始的处理过程如下:


class AlexNet(nn.Module):
    def __init__(self, num_classes=1000, init_weights=False):   
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(  #打包
            nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2),  # input[3, 224, 224]  output[48, 55, 55] 自动舍去小数点后
            nn.ReLU(inplace=True), #inplace 可以载入更大模型
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[48, 27, 27] kernel_num为原论文一半
            nn.Conv2d(48, 128, kernel_size=5, padding=2),           # output[128, 27, 27]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 13, 13]
            nn.Conv2d(128, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 128, kernel_size=3, padding=1),          # output[128, 13, 13]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 6, 6]
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            #全链接
            nn.Linear(128 * 6 * 6, 2048),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(2048, 2048),
            nn.ReLU(inplace=True),
            nn.Linear(2048, num_classes),
        )
        if init_weights:
            self._initialize_weights()
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, start_dim=1) #展平   或者view()
        x = self.classifier(x)
        return x
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') #何教授方法
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)  #正态分布赋值
                nn.init.constant_(m.bias, 0)


3.VGG


image.png

由于VGG网络有许多个版本,这里以VGG16层版本为例,也就是图中的版本D。


其中ABCDE表示不同深度的网络配置,convx-y中x表示卷积核尺寸,y表示特征通道数,比如conv3-256表示3x3的卷积核并且通道数为256;同理conv1-512表示卷积核3x3通道数为512。


batch_size = 10
class VGG16(nn.Module):
    def __init__(self):
        super(VGG16,self).__init__()
        # 卷积层,含有5大块
        self.conv_layer = nn.Sequential(
            # block1
            nn.Conv2d(in_channels=3,out_channels=64,kernel_size=3,stride=1,padding=1),  # torch.Size([batch_size, 64, 32, 32])
            nn.ReLU(),
            nn.Conv2d(in_channels=64,out_channels=64,kernel_size=3,stride=1,padding=1), # torch.Size([batch_size, 64, 32, 32])
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),  # torch.Size([batch_size, 64, 16, 16])
            # block2
            nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=1),  # torch.Size([10, 128, 16, 16])
            nn.ReLU(),
            nn.Conv2d(in_channels=128,out_channels=128,kernel_size=3,stride=1,padding=1), # torch.Size([10, 128, 16, 16])
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),  # torch.Size([10, 128, 8, 8])
            # block3
            nn.Conv2d(in_channels=128,out_channels=256,kernel_size=3,stride=1,padding=1), # torch.Size([10, 256, 8, 8])
            nn.ReLU(),
            nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,stride=1,padding=1), # torch.Size([10, 256, 8, 8])
            nn.ReLU(),
            nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,stride=1,padding=1), # torch.Size([10, 256, 8, 8])
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),  # torch.Size([10, 256, 4, 4])
            # block4
            nn.Conv2d(in_channels=256,out_channels=512,kernel_size=3,stride=1,padding=1), # torch.Size([10, 512, 4, 4])
            nn.ReLU(),
            nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,stride=1,padding=1), # torch.Size([10, 512, 4, 4])
            nn.ReLU(),
            nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,stride=1,padding=1), # torch.Size([10, 512, 4, 4])
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),  # torch.Size([10, 512, 2, 2])
            # block5
            nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,stride=1,padding=1), # torch.Size([10, 512, 2, 2])
            nn.ReLU(),
            nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,stride=1,padding=1), # torch.Size([10, 512, 2, 2])
            nn.ReLU(),
            nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3,stride=1,padding=1), # torch.Size([10, 512, 2, 2])
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),  # torch.Size([10, 512, 1, 1])
        )
        # 全连接层
        self.fullconn_layer = nn.Sequential(
            # input:torch.Size([10, 512])
            # 全连接层FC6:全连接 -->ReLU -->Dropout
            nn.Linear(512,4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            # 全连接层FC7:全连接 -->ReLU -->Dropout
            nn.Linear(4096,4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            # 输出层
            nn.Linear(4096,10),
        )
    def forward(self,x):
        output = self.conv_layer(x)    # output:torch.Size([10, 512, 1, 1])
        output = output.view(x.size(0), -1)   # output:torch.Size([10, 512])
        output = self.fullconn_layer(output) # output:torch.Size([10, 10])
        return output
net = VGG16()
# 测试代码
x = torch.randn(batch_size,3,32,32)
output = net(x)
output.shape


其中,原始的输入图像是224*224,所以以上的VGG16是改写的,原始的处理过程为:

参考:https://zhuanlan.zhihu.com/p/87555358


import torch.nn as nn
import torch
class SE_VGG(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.num_classes = num_classes
        # define an empty for Conv_ReLU_MaxPool
        net = []
        # block 1
        net.append(nn.Conv2d(in_channels=3, out_channels=64, padding=1, kernel_size=3, stride=1))
        net.append(nn.ReLU())
        net.append(nn.Conv2d(in_channels=64, out_channels=64, padding=1, kernel_size=3, stride=1))
        net.append(nn.ReLU())
        net.append(nn.MaxPool2d(kernel_size=2, stride=2))
        # block 2
        net.append(nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1))
        net.append(nn.ReLU())
        net.append(nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1))
        net.append(nn.ReLU())
        net.append(nn.MaxPool2d(kernel_size=2, stride=2))
        # block 3
        net.append(nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1, stride=1))
        net.append(nn.ReLU())
        net.append(nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1))
        net.append(nn.ReLU())
        net.append(nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1))
        net.append(nn.ReLU())
        net.append(nn.MaxPool2d(kernel_size=2, stride=2))
        # block 4
        net.append(nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1, stride=1))
        net.append(nn.ReLU())
        net.append(nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1))
        net.append(nn.ReLU())
        net.append(nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1))
        net.append(nn.ReLU())
        net.append(nn.MaxPool2d(kernel_size=2, stride=2))
        # block 5
        net.append(nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1))
        net.append(nn.ReLU())
        net.append(nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1))
        net.append(nn.ReLU())
        net.append(nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1))
        net.append(nn.ReLU())
        net.append(nn.MaxPool2d(kernel_size=2, stride=2))
        # add net into class property
        self.extract_feature = nn.Sequential(*net)
        # define an empty container for Linear operations
        classifier = []
        classifier.append(nn.Linear(in_features=512*7*7, out_features=4096))
        classifier.append(nn.ReLU())
        classifier.append(nn.Dropout(p=0.5))
        classifier.append(nn.Linear(in_features=4096, out_features=4096))
        classifier.append(nn.ReLU())
        classifier.append(nn.Dropout(p=0.5))
        classifier.append(nn.Linear(in_features=4096, out_features=self.num_classes))
        # add classifier into class property
        self.classifier = nn.Sequential(*classifier)
    def forward(self, x):
        feature = self.extract_feature(x)
        feature = feature.view(x.size(0), -1)
        classify_result = self.classifier(feature)
        return classify_result


4.GoogLeNet


亮点:


  • 引入了Inception结构(融合不同尺度的特征信息)
  • 使用1x1的卷积核进行降维以及映射处理
  • 添加两个辅助分类器帮助训练
  • 丢弃全连接层,使用平均池化层(大大减少模型 参数)

image.pngimage.png


注意:每个分支所得的特征矩阵高和宽必须相同


batch_size = 10
# 定义一个卷积-ReLU结构
class Conv_ReLU(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(Conv_ReLU,self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, **kwargs),
            nn.ReLU()
        )
    def forward(self, x):     
        output = self.model(x)
        return output
# 定义incepion结构
class Inception(nn.Module):
    def __init__(self, in_channels, ch1x1,ch3x3rd, ch3x3, ch5x5rd, ch5x5, poolproj):
        super(Inception,self).__init__()
        # 其中Conv_ReLU的stride默认为1,kernel_size根据图标得出,再根据输出的size计算出padding的取值
        # 以下4个分支的长宽必须一致
        # 分支1
        self.branch1 = Conv_ReLU(in_channels, ch1x1, kernel_size=1)
        # 分支2
        self.branch2 = nn.Sequential(
            Conv_ReLU(in_channels, ch3x3rd, kernel_size=1),
            Conv_ReLU(ch3x3rd, ch3x3, kernel_size=3, padding=1)
        )
        # 分支3
        self.branch3 = nn.Sequential(
            Conv_ReLU(in_channels, ch5x5rd, kernel_size=1),
            Conv_ReLU(ch5x5rd, ch5x5, kernel_size=5, padding=2),
        )
        # 分支4
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            Conv_ReLU(in_channels, poolproj, kernel_size=1)
        )
    def forward(self,x):
        # 以Inception(3a)层为例
        output_b1 = self.branch1(x) # torch.Size([10, 64, 4, 4])
        output_b2 = self.branch2(x) # torch.Size([10, 128, 4, 4])
        output_b3 = self.branch3(x) # torch.Size([10, 32, 4, 4])
        output_b4 = self.branch4(x) # torch.Size([10, 32, 4, 4])
        # 根据第一个维度进行合并
        output = torch.cat([output_b1, output_b2, output_b3, output_b4], dim=1)  # torch.Size([10, 256, 4, 4])
        return output
# 定义GoogLeNet卷积神经网络(忽略辅助分类器)
class GoogLeNet(nn.Module):
    def __init__(self, ):
        super(GoogLeNet, self).__init__()
        # 卷积层
        self.conv_layer = nn.Sequential(
            # Conv-Maxpool(忽略LocalRespNorm)
            Conv_ReLU(3, 64, kernel_size=7, stride=2, padding=3),   # torch.Size([10, 64, 16, 16])
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0),       # torch.Size([10, 64, 7, 7])
            # Conv-Conv-Maxpool(忽略LocalRespNorm)
            Conv_ReLU(64, 64, kernel_size=1),
            Conv_ReLU(64, 192, kernel_size=3, stride=1, padding=1), # torch.Size([10, 192, 7, 7])
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),       # torch.Size([10, 192, 4, 4])
            # Inception(3a)
            Inception(192, 64, 96, 128, 16, 32, 32),     # torch.Size([10, 256, 4, 4])
            # Inception(3b)
            Inception(256, 128, 128, 192, 32, 96, 64),   # torch.Size([10, 480, 4, 4])
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),       # torch.Size([10, 480, 2, 2])
            # Inception(4a)
            Inception(480, 192, 96, 208, 16, 48, 64),    # torch.Size([10, 512, 2, 2])
            # Inception(4b)
            Inception(512, 160, 112, 224, 24, 64, 64),   # torch.Size([10, 512, 2, 2])
            # Inception(4c)
            Inception(512, 128, 128, 256, 24, 64, 64),   # torch.Size([10, 512, 2, 2])
            # Inception(4d)
            Inception(512, 112, 144, 288, 32, 64, 64),   # torch.Size([10, 528, 2, 2])
            # Inception(4e)
            Inception(528, 256, 160, 320, 32, 128, 128), # torch.Size([10, 832, 2, 2])
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),       # torch.Size([10, 832, 1, 1])
            # Inception(5a)
            Inception(832, 256, 160, 320, 32, 128, 128), # torch.Size([10, 832, 1, 1])
            # Inception(5b)
            Inception(832, 384, 182, 384, 48, 128, 128), # torch.Size([10, 1024, 1, 1])
            # 自适应层,无论维度多少都可以通过AdaptiveAvgPool2d变成长宽比为1:1
            nn.AdaptiveAvgPool2d((1,1)),                 # torch.Size([10, 1024, 1, 1])
        )
        # 全连接层
        self.fullconn_layer = nn.Sequential(
            nn.Dropout2d(p=0.4),
            nn.Linear(1024, 10)      # torch.Size([10, 10])
        )
    def forward(self,x):
        output = self.conv_layer(x)             # output:torch.Size([10, 1024, 1, 1])
        output = output.view(x.size(0), -1)     # output:torch.Size([10, 1024])
        output = self.fullconn_layer(output)    # output:torch.Size([10, 10])
        return output
# 测试shape代码
net = GoogLeNet()
x = torch.randn(batch_size,3,32,32)
output = net(x)
output.shape


以上的GoogLeNet为32*32的的图像分类网络所搭建,没有辅助分类器。可以参考https://www.jianshu.com/p/a0bdab69b423https://zhuanlan.zhihu.com/p/185025947


以1000分类为例,原始的GoogLeNet网络结构,含辅助分类器结构如下代码:


# GoogLeNet网络结构
class GoogLeNet(nn.Module):
    # 其中aux_logits表示是否使用辅助分类器,init_weights表示是否进行权重初始化
    def __init__(self, num_classes=1000, aux_logits=True, init_weights=False):
        super(GoogLeNet, self).__init__()
        self.aux_logits = aux_logits
        self.conv1 = BasicConv2d(3, 64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
        self.conv2 = BasicConv2d(64, 64, kernel_size=1)
        self.conv3 = BasicConv2d(64, 192, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
        self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
        self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
        self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128)
        # 如果开启辅助分类器会进行如下操作
        if self.aux_logits:
            self.aux1 = InceptionAux(512, num_classes)
            self.aux2 = InceptionAux(528, num_classes)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.4)
        self.fc = nn.Linear(1024, num_classes)
        # 如果开启初始化权重会进行如下操作
        if init_weights:
            self._initialize_weights()
    def forward(self, x):
        # N x 3 x 224 x 224
        x = self.conv1(x)
        # N x 64 x 112 x 112
        x = self.maxpool1(x)
        # N x 64 x 56 x 56
        x = self.conv2(x)
        # N x 64 x 56 x 56
        x = self.conv3(x)
        # N x 192 x 56 x 56
        x = self.maxpool2(x)
        # N x 192 x 28 x 28
        x = self.inception3a(x)
        # N x 256 x 28 x 28
        x = self.inception3b(x)
        # N x 480 x 28 x 28
        x = self.maxpool3(x)
        # N x 480 x 14 x 14
        x = self.inception4a(x)
        # N x 512 x 14 x 14
        if self.training and self.aux_logits:    # eval model lose this layer
            aux1 = self.aux1(x)
        x = self.inception4b(x)
        # N x 512 x 14 x 14
        x = self.inception4c(x)
        # N x 512 x 14 x 14
        x = self.inception4d(x)
        # N x 528 x 14 x 14
        if self.training and self.aux_logits:    # eval model lose this layer
            aux2 = self.aux2(x)
        x = self.inception4e(x)
        # N x 832 x 14 x 14
        x = self.maxpool4(x)
        # N x 832 x 7 x 7
        x = self.inception5a(x)
        # N x 832 x 7 x 7
        x = self.inception5b(x)
        # N x 1024 x 7 x 7
        x = self.avgpool(x)
        # N x 1024 x 1 x 1
        x = torch.flatten(x, 1)
        # N x 1024
        x = self.dropout(x)
        x = self.fc(x)
        # N x 1000 (num_classes)
        if self.training and self.aux_logits:   # eval model lose this layer
            return x, aux2, aux1
        return x
    # 权重初始化操作
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
# Inception结构
class Inception(nn.Module):
    def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj):
        super(Inception, self).__init__()
        self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)
        self.branch2 = nn.Sequential(
            BasicConv2d(in_channels, ch3x3red, kernel_size=1),
            BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1)   # 保证输出大小等于输入大小
        )
        self.branch3 = nn.Sequential(
            BasicConv2d(in_channels, ch5x5red, kernel_size=1),
            BasicConv2d(ch5x5red, ch5x5, kernel_size=5, padding=2)   # 保证输出大小等于输入大小
        )
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            BasicConv2d(in_channels, pool_proj, kernel_size=1)
        )
    def forward(self, x):
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)
        outputs = [branch1, branch2, branch3, branch4]
        return torch.cat(outputs, 1)
# 辅助分类器
class InceptionAux(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(InceptionAux, self).__init__()
        self.averagePool = nn.AvgPool2d(kernel_size=5, stride=3)
        self.conv = BasicConv2d(in_channels, 128, kernel_size=1)  # output[batch, 128, 4, 4]
        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, num_classes)
    def forward(self, x):
        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
        x = self.averagePool(x)
        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
        x = self.conv(x)
        # N x 128 x 4 x 4
        x = torch.flatten(x, 1)
        x = F.dropout(x, 0.5, training=self.training)
        # N x 2048
        x = F.relu(self.fc1(x), inplace=True)
        x = F.dropout(x, 0.5, training=self.training)
        # N x 1024
        x = self.fc2(x)
        # N x num_classes
        return x
# 卷积ReLu函数
class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.relu = nn.ReLU(inplace=True)
    def forward(self, x):
        x = self.conv(x)
        x = self.relu(x)
        return x


5.ResNet


亮点:


  • 超深的网络结构


  • 提出resiual模块


  • 使用Batch Normalization加速训练(丢弃dropout)


ResNet中两种基本的残差快结构:

image.png


左边的结构使用与50层以下的网络;右边的结构使用与50层以上的网络

image.png

image.png


以下以实现Res18为例:


batch_size = 10
# 定义一个残差模块基本结构
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(ResBlock,self).__init__()
        # 两层卷积输出
        self.model = nn.Sequential(
            nn.Conv2d(in_channels, in_channels, stride=2, **kwargs), 
            nn.BatchNorm2d(in_channels),    # torch.Size([10, 64, 8, 8])
            nn.ReLU(),
            nn.Conv2d(in_channels ,out_channels, stride=1, **kwargs),
            nn.BatchNorm2d(out_channels),   # torch.Size([10, 128, 8, 8])         
        )
        # 直接来至输入的一个卷积
        self.extra = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=2, bias=False),
            nn.BatchNorm2d(out_channels)     # torch.Size([10, 128, 8, 8])
        )
    def forward(self, x):
        # 首先得到一个基本卷积后的输出
        output = self.model(x)
        # 将卷积后的输出与输入进行一个残差相加
        output += self.extra(x)
        # 再通过激活函数
        output = F.relu(output)     # torch.Size([10, 128, 8, 8])
        return output
# 残差网络
class ResNet(nn.Module):
    def __init__(self):
        super(ResNet,self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False),  
            nn.BatchNorm2d(64),     # # torch.Size([10, 64, 16, 16])
            ResBlock(64, 128, kernel_size=3, padding=1, bias=False),   # torch.Size([10, 128, 8, 8])
            ResBlock(128, 256, kernel_size=3, padding=1, bias=False),  # torch.Size([10, 256, 4, 4])
            ResBlock(256, 512, kernel_size=3, padding=1, bias=False),  # torch.Size([10, 512, 2, 2])
            ResBlock(512, 512, kernel_size=3, padding=1, bias=False),  # torch.Size([10, 512, 1, 1])
            nn.AdaptiveAvgPool2d((1,1))
        )
        self.outlayer = nn.Linear(512, 10)
    def forward(self, x):
        output = self.model(x)   # torch.Size([10, 512, 1, 1])
        output = output.view(x.size(0), -1)    # torch.Size([10, 512])
        output = self.outlayer(output)   # torch.Size([10, 10])
        return output
# 测试代码
net = ResNet()
x = torch.randn(batch_size,3,32,32)
output = net(x)
output.shape
目录
相关文章
|
6月前
|
机器学习/深度学习 存储 算法
【复现】尝试使用numpy对卷积神经网络中各经典结构进行改写复现
【复现】尝试使用numpy对卷积神经网络中各经典结构进行改写复现
98 0
【复现】尝试使用numpy对卷积神经网络中各经典结构进行改写复现
|
6月前
|
机器学习/深度学习 算法 PyTorch
【PyTorch实战演练】Fast R-CNN中的RoI(Region of Interest)池化详解
【PyTorch实战演练】Fast R-CNN中的RoI(Region of Interest)池化详解
185 1
|
6月前
|
机器学习/深度学习 编解码 PyTorch
Pytorch实现手写数字识别 | MNIST数据集(CNN卷积神经网络)
Pytorch实现手写数字识别 | MNIST数据集(CNN卷积神经网络)
|
2月前
|
机器学习/深度学习 PyTorch 算法框架/工具
CNN中的注意力机制综合指南:从理论到Pytorch代码实现
注意力机制已成为深度学习模型的关键组件,尤其在卷积神经网络(CNN)中发挥了重要作用。通过使模型关注输入数据中最相关的部分,注意力机制显著提升了CNN在图像分类、目标检测和语义分割等任务中的表现。本文将详细介绍CNN中的注意力机制,包括其基本概念、不同类型(如通道注意力、空间注意力和混合注意力)以及实际实现方法。此外,还将探讨注意力机制在多个计算机视觉任务中的应用效果及其面临的挑战。无论是图像分类还是医学图像分析,注意力机制都能显著提升模型性能,并在不断发展的深度学习领域中扮演重要角色。
100 10
|
3月前
|
机器学习/深度学习 人工智能 PyTorch
AI智能体研发之路-模型篇(五):pytorch vs tensorflow框架DNN网络结构源码级对比
AI智能体研发之路-模型篇(五):pytorch vs tensorflow框架DNN网络结构源码级对比
79 1
|
6月前
|
机器学习/深度学习 PyTorch 算法框架/工具
基于Pytorch通过实例详细剖析CNN
基于Pytorch通过实例详细剖析CNN
72 1
基于Pytorch通过实例详细剖析CNN
|
6月前
|
机器学习/深度学习 算法 PyTorch
【PyTorch实战演练】深入剖析MTCNN(多任务级联卷积神经网络)并使用30行代码实现人脸识别
【PyTorch实战演练】深入剖析MTCNN(多任务级联卷积神经网络)并使用30行代码实现人脸识别
567 2
|
6月前
|
机器学习/深度学习 数据采集 PyTorch
PyTorch搭建卷积神经网络(ResNet-50网络)进行图像分类实战(附源码和数据集)
PyTorch搭建卷积神经网络(ResNet-50网络)进行图像分类实战(附源码和数据集)
235 1
|
6月前
|
机器学习/深度学习 PyTorch 算法框架/工具
PyTorch深度学习中卷积神经网络(CNN)的讲解及图像处理实战(超详细 附源码)
PyTorch深度学习中卷积神经网络(CNN)的讲解及图像处理实战(超详细 附源码)
311 0
|
6月前
|
机器学习/深度学习 人工智能 自然语言处理
PyTorch搭建图卷积神经网络(GCN)完成对论文分类及预测实战(附源码和数据集)
PyTorch搭建图卷积神经网络(GCN)完成对论文分类及预测实战(附源码和数据集)
337 1

热门文章

最新文章

下一篇
无影云桌面