9、CNN 卷积神经网络
B站视频教程传送门:PyTorch深度学习实践 - 卷积神经网络(基础篇) PyTorch深度学习实践 - 卷积神经网络(高级篇)
9.1 Revision
全连接神经网络(Fully Connected Neural Network):该网络完全由线形层Linear串行连接起来,即每一个输入节点都要参与到下一层任一输出节点的计算上。
class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.l1 = torch.nn.Linear(784, 512) self.l2 = torch.nn.Linear(512, 256) self.l3 = torch.nn.Linear(256, 128) self.l4 = torch.nn.Linear(128, 64) self.l5 = torch.nn.Linear(64, 10) def forward(self, x): x = x.view(-1, 784) x = F.relu(self.l1(x)) x = F.relu(self.l2(x)) x = F.relu(self.l3(x)) x = F.relu(self.l4(x)) return self.l5(x) model = Net()
9.2 Introduction
Convolutional Neural Network
注意:
9.3 Convolution
9.3.1 Channel
- Single Input Channel:
- 3 Input Channels:
其中,C H W 变化如下:
- N Input Channels:
- N Input Channels and M Output Channels
要想输出 M 通道的图像,卷积核也需设置为 M 个:
9.3.2 Layer
import torch in_channels, out_channels = 5, 10 width, height = 100, 100 kernel_size = 3 batch_size = 1 input = torch.randn(batch_size, in_channels, width, height) conv_layer = torch.nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size) output = conv_layer(input) print(input.shape) print(conv_layer.weight.shape) # m n w h print(output.shape)
torch.Size([1, 5, 100, 100]) torch.Size([10, 5, 3, 3]) torch.Size([1, 10, 98, 98])
9.3.3 Padding
可以使用参数 padding=1 ,先将input填充至 7 × 7 ,这样卷积之后,output仍为 5×5 。
import torch input = [3, 4, 6, 5, 7, 2, 4, 6, 8, 2, 1, 6, 7, 8, 4, 9, 7, 4, 6, 2, 3, 7, 5, 4, 1] input = torch.Tensor(input).view(1, 1, 5, 5) # B C W H conv_layer = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, padding=1, bias=False) # O I W H kernel = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]).view(1, 1, 3, 3) conv_layer.weight.data = kernel.data output = conv_layer(input) print(output)
tensor([[[[ 91., 168., 224., 215., 127.], [114., 211., 295., 262., 149.], [192., 259., 282., 214., 122.], [194., 251., 253., 169., 86.], [ 96., 112., 110., 68., 31.]]]], grad_fn=<ConvolutionBackward0>)
9.3.4 Stride
参数 stride 意为步长,假设 stride=2 时,kernel在向右或向下移动时,一次性移动两格,可以有效的降低图像的宽度和高度。
import torch input = [3, 4, 6, 5, 7, 2, 4, 6, 8, 2, 1, 6, 7, 8, 4, 9, 7, 4, 6, 2, 3, 7, 5, 4, 1] input = torch.Tensor(input).view(1, 1, 5, 5) # B C W H conv_layer = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride=2, bias=False) # O I W H kernel = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]).view(1, 1, 3, 3) conv_layer.weight.data = kernel.data output = conv_layer(input) print(output)
tensor([[[[211., 262.], [251., 169.]]]], grad_fn=<ConvolutionBackward0>)
9.4 Max Pooling
Max Pooling:最大池化,默认 stride=2 ,若 kernel=2×2 ,即在该表格中找出最大值:
import torch input = [3, 4, 6, 5, 2, 4, 6, 8, 1, 6, 7, 8, 9, 7, 4, 6] input = torch.Tensor(input).view(1, 1, 4, 4) maxpooling_layer = torch.nn.MaxPool2d(kernel_size=2) output = maxpooling_layer(input) print(output)
tensor([[[[4., 8.], [9., 8.]]]])
9.5 A Simple CNN
下图为一个简单的神经网络:
即:
代码如下:
class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5) self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5) self.pooling = torch.nn.MaxPool2d(2) self.fc = torch.nn.Linear(320, 10) def forward(self, x): # Flatten data from (n, 1, 28, 28) to (n, 784) batch_size = x.size(0) x = F.relu(self.pooling(self.conv1(x))) x = F.relu(self.pooling(self.conv2(x))) x = x.view(batch_size, -1) # flatten x = self.fc(x) return x model = Net()
9.5.1 GPU
使用GPU来跑数据的前提:安装CUDA版PyTorch
- Move Model to GPU :在调用模型后添加以下代码
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device)
- Move Tensors to GPU :训练和测试函数添加以下代码
inputs, target = inputs.to(device), target.to(device)
9.5.2 Code 1
import torch from torchvision import transforms from torch.utils.data import DataLoader from torchvision import datasets import torch.nn.functional as F import torch.optim as optim import matplotlib.pyplot as plt batch_size = 64 transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) train_dataset = datasets.MNIST(root='../data/mnist', train=True, download=True, transform=transform) train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size) test_dataset = datasets.MNIST(root='../data/mnist', train=False, download=True, transform=transform) test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size) class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5) self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5) self.pooling = torch.nn.MaxPool2d(2) self.fc = torch.nn.Linear(320, 10) def forward(self, x): # Flatten data from (n, 1, 28, 28) to (n, 784) batch_size = x.size(0) x = F.relu(self.pooling(self.conv1(x))) x = F.relu(self.pooling(self.conv2(x))) x = x.view(batch_size, -1) # flatten x = self.fc(x) return x model = Net() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # GPU model.to(device) criterion = torch.nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) def train(epoch): running_loss = 0.0 for batch_idx, data in enumerate(train_loader, 0): inputs, target = data inputs, target = inputs.to(device), target.to(device) # GPU optimizer.zero_grad() # forward + backward + update outputs = model(inputs) loss = criterion(outputs, target) loss.backward() optimizer.step() running_loss += loss.item() if batch_idx % 300 == 299: print('[%d, %3d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 2000)) running_loss = 0.0 accuracy = [] def test(): correct = 0 total = 0 with torch.no_grad(): for data in test_loader: inputs, target = data inputs, target = inputs.to(device), target.to(device) # GPU outputs = model(inputs) _, predicted = torch.max(outputs.data, dim=1) total += target.size(0) correct += (predicted == target).sum().item() print('Accuracy on test set: %d %% [%d/%d]' % (100 * correct / total, correct, total)) accuracy.append(100 * correct / total) if __name__ == '__main__': for epoch in range(10): train(epoch) test() print(accuracy) plt.plot(range(10), accuracy) plt.xlabel("Epoch") plt.ylabel("Accuracy") plt.grid() plt.show()
[1, 300] loss: 0.091 [1, 600] loss: 0.027 [1, 900] loss: 0.020 Accuracy on test set: 97 % [9700/10000] [2, 300] loss: 0.017 [2, 600] loss: 0.014 [2, 900] loss: 0.013 Accuracy on test set: 97 % [9799/10000] [3, 300] loss: 0.012 [3, 600] loss: 0.011 [3, 900] loss: 0.011 Accuracy on test set: 98 % [9813/10000] [4, 300] loss: 0.010 [4, 600] loss: 0.009 [4, 900] loss: 0.009 Accuracy on test set: 98 % [9838/10000] [5, 300] loss: 0.008 [5, 600] loss: 0.008 [5, 900] loss: 0.008 Accuracy on test set: 98 % [9846/10000] [6, 300] loss: 0.007 [6, 600] loss: 0.008 [6, 900] loss: 0.007 Accuracy on test set: 98 % [9858/10000] [7, 300] loss: 0.006 [7, 600] loss: 0.007 [7, 900] loss: 0.007 Accuracy on test set: 98 % [9869/10000] [8, 300] loss: 0.006 [8, 600] loss: 0.006 [8, 900] loss: 0.006 Accuracy on test set: 98 % [9869/10000] [9, 300] loss: 0.006 [9, 600] loss: 0.006 [9, 900] loss: 0.006 Accuracy on test set: 98 % [9849/10000] [10, 300] loss: 0.005 [10, 600] loss: 0.005 [10, 900] loss: 0.005 Accuracy on test set: 98 % [9849/10000] [97.0, 97.99, 98.13, 98.38, 98.46, 98.58, 98.69, 98.69, 98.49, 98.49]