PyTorch中的模型创建(一)+https://developer.aliyun.com/article/1544695?spm=a2c6h.13148508.setting.26.2a1e4f0e5cwuHg
卷积层
二维卷积
torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode=‘zeros’, device=None, dtype=None)
- in_channels: 输入通道数
- out_channels: 输出通道数(卷积核数量)
- kernel_size: 卷积核大小
- stride: 卷积步长
- padding: 边缘补零
- dilation: 扩散卷积
- group: 分组卷积
- bias: 是否带有偏置
import torch import torch.nn as nn m = nn.conv2d(16,33,3, stride=2) m = nn.conv2d(16,33,(3,5),stride=(2,1),padding=(4,2)) m = nn.Conv2d(16,33,(3,5),stride=(2,1),padding=(4,2),dilation=(3,1)) input = torch.randn(20,16,50,100) output = m( input) print(output.shape)
转置卷积就是卷积的逆操作,也称为逆卷积、反卷积
torch.nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode=‘zeros’, device=None, dtype=None)
- 输入:(𝑁,𝐶𝑖𝑛,𝐻𝑖𝑛,𝑊𝑖𝑛)或者(𝐶𝑖𝑛,𝐻𝑖𝑛,𝑊𝑖𝑛)
- 输出:(𝑁,𝐶𝑜𝑢𝑡,𝐻𝑜𝑢𝑡,𝑊𝑜𝑢𝑡)或者(𝐶𝑜𝑢𝑡,𝐻𝑜𝑢𝑡,𝑊𝑜𝑢𝑡)
转置卷积是一种卷积神经网络中的操作,它的作用是将输入的特征图进行上采样,从而增加特征图的尺寸。转置卷积通常用于生成器网络中,将低分辨率的图像转换为高分辨率的图像。
import torch import torch.nn as nn transposed_conv = nn.ConvTranspose2d(in_channels=3, out_channels=64, kernel_size=4, stride=2, padding=1) input_tensor = torch.randn(1, 3, 32, 32) output_tensor = transposed_conv(input_tensor) print("输入张量的形状:", input_tensor.shape) print("输出张量的形状:", output_tensor.shape)
搭建全卷积网络结构案例
import torch.nn as nn import torch.nn.functional as F import torch from torchsummary import summary class FCN(nn.Module): def __init__(self,num_classes): super(FCN,self).__init__() self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3).cuda() # kernel_size=3, 卷积核大小 self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3).cuda() self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3).cuda() self.upsample1 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=3).cuda() self.upsample2 = nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=3).cuda() self.upsample3 = nn.ConvTranspose2d(in_channels=32, out_channels=num_classes, kernel_size=3).cuda() def forward(self, x): x = F.relu(self.conv1(x)) x = F.relu(self.conv2(x)) x = F.relu(self.conv3(x)) x = F.relu(self.upsample1(x)) x = F.relu(self.upsample2(x)) x = F.relu(self.upsample3(x)) return x num_classes = 10 fcn_model = FCN(num_classes) print(fcn_model) summary(fcn_model, (3, 224, 224))
输出:
FCN( (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1)) (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1)) (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1)) (upsample1): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(1, 1)) (upsample2): ConvTranspose2d(64, 32, kernel_size=(3, 3), stride=(1, 1)) (upsample3): ConvTranspose2d(32, 10, kernel_size=(3, 3), stride=(1, 1)) ) ---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 32, 222, 222] 896 Conv2d-2 [-1, 64, 220, 220] 18,496 Conv2d-3 [-1, 128, 218, 218] 73,856 ConvTranspose2d-4 [-1, 64, 220, 220] 73,792 ConvTranspose2d-5 [-1, 32, 222, 222] 18,464 ConvTranspose2d-6 [-1, 10, 224, 224] 2,890 ================================================================ Total params: 188,394 Trainable params: 188,394 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.57 Forward/backward pass size (MB): 121.57 Params size (MB): 0.72 Estimated Total Size (MB): 122.86 ----------------------------------------------------------------
搭建卷积+全连接的网络结构
import torch.nn as nn import torch.nn.functional as F import torch from torchsummary import summary class ConvNet(nn.Module): def __init__(self,num_classes=10): super(ConvNet,self).__init__() self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3).cuda() # kernel_size=3, 卷积核大小 self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3).cuda() self.flatten = nn.Flatten(start_dim=1).cuda() self.fc1 = nn.Linear(64*28*28, 50).cuda() self.fc2 = nn.Linear(50, num_classes).cuda() def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = self.flatten(x) x = self.fc1(x) x = self.fc2(x) return x num_classes = 10 conv_net = ConvNet(num_classes) bacth_size = 4 input_tensor = torch.randn(bacth_size, 3, 32, 32).cuda() # 输入是4张32x32的RGB图像 output = conv_net(input_tensor) print(output.shape) summary(conv_net, (3, 32, 32))
输出:
torch.Size([4, 10]) ---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 32, 30, 30] 896 Conv2d-2 [-1, 64, 28, 28] 18,496 Flatten-3 [-1, 50176] 0 Linear-4 [-1, 50] 2,508,850 Linear-5 [-1, 10] 510 ================================================================ Total params: 2,528,752 Trainable params: 2,528,752 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.01 Forward/backward pass size (MB): 0.99 Params size (MB): 9.65 Estimated Total Size (MB): 10.64 ---------------------------------------------------------------- Process finished with exit code 0
池化层
池化包含最大池化和平均池化,有一维池化,二维池化,三维池化,在这里以二维池化为例
最大池化就是求一个区域中的最大值,来代替该区域。
torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
输入参数 kernel_size
,stride
,padding
,dilation
可以是
- 一个 int :代表长宽使用同样的参数
- 两个int组成的元组:第一个int用在H维度,第二个int用在W维度
m1 = nn.MaxPool2d( 3,stride=2) m2 = nn.MaxPool2d(( 3,2), stride=(2,1)) input = torch.randn(4,3,24,24) output1 = m1( input) output2 = m2(input) print( "input.shape = " ,input.shape) print( "output1.shape = ", output1.shape) print( "output2.shape = " , output2.shape)
input.shape = torch.size( [4,3,24,24])
output1.shape = torch.size([4,3,11,11])
output2.shape = torch.size([4,3,11,23])
平均池化
平均池化就是用一个区域中的平均数来代替本区域
torch.nn.AvgPool2d(kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None)
import torch import torch.nn as nn m1 = nn.AvgPool2d( 3, stride=2) m2 = nn.AvgPool2d((3,2), stride=(2,1)) input = torch.randn( 4,3,24,24) output1 = m1( input) output2 = m2(input) print( "input.shape = " , input. shape) print( "output1.shape = ", output1.shape) print( "output2.shape = " , output2.shape)
input.shape = torch.size([4,3,24,24])
output1.shape = torch.size([4,3,11,11])
output2.shape = torch.size([4,3,11,23])
BN层
BN,即Batch Normalization,是对每一个batch的数据进行归一化操作,可以使得网络训练更稳定,加速网络的收敛。
m_learnable = nn.BatchNorm2d( 100) m_non_learnable = nn.BatchNorm2d(100,affine=False) input = torch.randn(20,100,35,45) output_learnable = m_learnable( input) output_non_learnable = m_non_learnable(input) print( "input.shape = ", input.shape) print( "output_learnable.shape = ", output_learnable.shape) print( "output_non_learnable.shape = ", output_non_learnable.shape)
input.shape = torch.size( [20,100,35,45])
output_learnable.shape = torch.size( [20,100,35,45])
output_non_learnable.shape = torch.size([20,100,35,45])