深度学习之目标检测--Pytorch实战 2

简介: 深度学习之目标检测--Pytorch实战

四、神经网络的搭建

# nn_module.py
# @Time: 2022/1/13 21:42
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
from torch import nn
class NeuralNetwork(nn.Module):
    def __init__(self) -> None:
        super().__init__()
    def forward(self, input):
        output = input + 1
        return output
neuralnetwork = NeuralNetwork()
x = torch.tensor(1.0)
output = neuralnetwork(x)
print(output)

fc2efbae29d24acf8c5e0cd5c6e14b71.png

二维卷积层Convolution Layers

1b061bcb765f42dfac128920b1398bde.png

# nn_conv.py
# @Time: 2022/1/13 22:08
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torch.nn.functional as F
input = torch.tensor([[1, 2, 0, 3, 1],
                      [0, 1, 2, 3, 1],
                      [1, 2, 1, 0, 0],
                      [5, 2, 3, 1, 1],
                      [2, 1, 0, 1, 1]])
kernel = torch.tensor([[1, 2, 1],
                       [0, 1, 0],
                       [2, 1, 0]])
input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))
print(input.shape)
print(kernel.shape)
output = F.conv2d(input, kernel, stride=1)
print(output)
output2 = F.conv2d(input, kernel, stride=2)
print(output2)
output3 = F.conv2d(input, kernel, stride=1, padding=1)
print(output3)

62d89c872cf448fc9b6a570948fbc113.png

# src/nn_conv2d.py
# @Time: 2022/1/13 22:37
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=64)
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
    def forward(self,x):
        x = self.conv1(x)
        return x
neuralnetwork = NeuralNetwork()
# print(neuralnetwork)
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
    imgs, targets = data
    output = neuralnetwork(imgs)
    print(imgs.shape)
    print(output.shape)
    # torch.Size([64, 3, 32, 32])
    writer.add_images("input", imgs, step)
    # torch.Size([64, 6, 30, 30])
    output = torch.reshape(output, (-1, 3, 30, 30))
    writer.add_images("output", output, step)
    step += 1
writer.close()

95cb5dabe74443749f7f83f34f77c1f4.png7e2181ca4d564f6ab2f2980c0938e1f6.png

池化层 Pooling layers

463f337e141b416fb752d8cef779f8d5.png


226385d9e099402bae530c47bb0843e0.png


# nn_maxpool.py
# @Time: 2022/1/14 9:53
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
from torch import nn
from torch.nn import MaxPool2d
input = torch.tensor([[1, 2, 0, 3, 1],
                      [0, 1, 2, 3, 1],
                      [1, 2, 1, 0, 0],
                      [5, 2, 3, 1, 1],
                      [2, 1, 0, 1, 1]], dtype=torch.float32)
input = torch.reshape(input, (-1, 1, 5, 5))
print(input.shape)
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)
    def forward(self, input):
        output = self.maxpool1(input)
        return output
nueralnetwork = NueralNetwork()
output = nueralnetwork(input)
print(output)
tensor([[[[2., 3.],
          [5., 1.]]]])


e2fd77ebceec4bb89d0f325816ff9f63.png

ceil_mode=False
tensor([[[[2.]]]])

5cc0783aa3a642d8a790e11819d1d7d6.png

# @Time: 2022/1/14 9:53
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import MaxPool2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=64)
# input = torch.tensor([[1, 2, 0, 3, 1],
#                       [0, 1, 2, 3, 1],
#                       [1, 2, 1, 0, 0],
#                       [5, 2, 3, 1, 1],
#                       [2, 1, 0, 1, 1]], dtype=torch.float32)
#
# input = torch.reshape(input, (-1, 1, 5, 5))
# print(input.shape)
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)
    def forward(self, input):
        output = self.maxpool1(input)
        return output
nueralnetwork = NueralNetwork()
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
    imgs, targets = data
    writer.add_images("input", imgs, step)
    output = nueralnetwork(imgs)
    writer.add_images("output", output, step)
    step += 1
writer.close()
# output = nueralnetwork(input)
#
# print(output)

eee40e838e3e467ba67b932c3d9a4a47.png


1bd84e5c7b1e4af199aeda3e90a82df3.png

非线性激活Non-linear Activations (weighted sum, nonlinearity)

nn. ReLU

493e82a4bf85493b81ae84c54ff9e03c.png

import torch
import torchvision
from torch import nn
from torch.nn import ReLU
from torch.utils.data import DataLoader
input = torch.tensor([[1, -0.5],
                      [-1, 3]])
input = torch.reshape(input, (-1, 1, 2, 2))
print(input.shape)
print(input)
# tensor([[[[ 1.0000, -0.5000],
          [-1.0000,  3.0000]]]])
class NerualNetwork(nn.Module):
    def __init__(self):
        super(NerualNetwork, self).__init__()
        self.relu1 = ReLU()
    def forward(self, input):
        output = self.relu1(input)
        return output
nerualnetwork = NerualNetwork()
output = nerualnetwork(input)
print(output)
tensor([[[[1., 0.],
          [0., 3.]]]])

05896672021948e0b6edee74155a7607.png

nn.Sigmoid

24f242a23148415485065f308fc238f7.png

tensorboard里面 step 不从0 开始,是图片显示的问题, 用命令:

tensorboard --logdir=logs --samples_per_plugin images=1000
# @Time: 2022/1/14 10:34
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import ReLU, Sigmoid
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
input = torch.tensor([[1, -0.5],
                      [-1, 3]])
input = torch.reshape(input, (-1, 1, 2, 2))
print(input.shape)
print(input)
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=64, )
class NerualNetwork(nn.Module):
    def __init__(self):
        super(NerualNetwork, self).__init__()
        self.relu1 = ReLU()
        self.sigmod1 = Sigmoid()
    def forward(self, input):
        output = self.sigmod1(input)
        return output
nerualnetwork = NerualNetwork()
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
    imgs, target = data
    writer.add_images("input", imgs, global_step=step)
    output = nerualnetwork(imgs)
    writer.add_images("output", output, global_step=step)
    step += 1
writer.close()

e1a825ad930848cf85a9047c79d04737.png


994c2e2c69154747989639525aac84e0.png

正则化Normalization Layers

nn.BatchNorm2d

Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift .

d9ea1fccfcd440e49747a707abc6365d.png

Recurrent Layers

nn.RNN

nn.LSTM

Sparse Layers

nn.Embedding

A simple lookup table that stores embeddings of a fixed dictionary and size.

Linear Layers

# nn_linear.py
# @Time: 2022/1/14 11:48
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import Linear
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=64)
class NerualNetwork(nn.Module):
    def __init__(self):
        super(NerualNetwork, self).__init__()
        self.linear1 = Linear(196608, 10)
    def forward(self, input):
        output = self.linear1(input)
        return output
nerualnetwork = NerualNetwork()
for data in dataloader:
    imgs, targets =data
    print(imgs.shape)
    # torch.Size([64, 3, 32, 32])
    # output = torch.reshape(imgs, (1, 1, 1, -1))
    output = torch.flatten(imgs)
    print(output.shape)
    # torch.Size([196608])
    output = nerualnetwork(output)
    print(output.shape)
    # torch.Size([10])

85a860d57a204242ad75530555e1d1a6.png

SEQUENTIAL

# nn_seq.py
# @Time: 2022/1/14 12:57
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.tensorboard import SummaryWriter
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        # self.conv1 = Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2)
        # self.maxpool1 = MaxPool2d(kernel_size=2)
        # self.conv2 = Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2)
        # self.maxpool2 = MaxPool2d(kernel_size=2)
        # self.conv3 = Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
        # self.maxpool3 = MaxPool2d(kernel_size=2)
        # self.flatten = Flatten()
        # self.linear1 = Linear(in_features=1024, out_features=64)
        # self.linear2 = Linear(in_features=64, out_features=10)
        # 简单的写法
        self.model1 = Sequential(
            Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Flatten(),
            Linear(in_features=1024, out_features=64),
            Linear(in_features=64, out_features=10)
        )
    def forward(self, x):
        # x = self.conv1(x)
        # x = self.maxpool1(x)
        # x = self.conv2(x)
        # x = self.maxpool2(x)
        # x = self.conv3(x)
        # x = self.maxpool3(x)
        # x = self.flatten(x)
        # x = self.linear1(x)
        # x = self.linear2(x)
        x = self.model1(x)
        return x
nueralnetwork = NueralNetwork()
print(nueralnetwork)
input = torch.ones((64, 3, 32, 32))
output = nueralnetwork(input)
print(output.shape)
writer = SummaryWriter("../logs")
writer.add_graph(nueralnetwork, input)
writer.close()

输出结果是

NueralNetwork(
  (model1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=1024, out_features=64, bias=True)
    (8): Linear(in_features=64, out_features=10, bias=True)
  )
)
torch.Size([64, 10])

3d62fa7cd9054e58966110e2f6740eb3.png793b60c6be104169a31ceece93d6def1.pnge223dadc91bf495da959bd88206202ef.png

损失函数Loss Functions

nn.L1Loss

28cb6dd32d5d44a69c8f64d5b9fd87a1.png
nn.MSELoss

# nn_loss_network.py
import torchvision
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, download=True,
                                       transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=1)
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        self.model1 = Sequential(
            Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Flatten(),
            Linear(in_features=1024, out_features=64),
            Linear(in_features=64, out_features=10)
        )
    def forward(self, x):
        x = self.model1(x)
        return x
loss = nn.CrossEntropyLoss()
nueralnetwork = NueralNetwork()
for data in dataloader:
    imgs, targets = data
    outputs = nueralnetwork(imgs)
    result_loss = loss(outputs, targets)
    print(result_loss)
    print(outputs)
    print(targets)

b43d7005acb2438db38d42d89dfce382.png

nn.CrossEntropyLoss

优化器TORCH.OPTIM

# nn_optim.py
# @Time: 2022/1/14 20:21
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, download=True,
                                       transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=1)
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        self.model1 = Sequential(
            Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Flatten(),
            Linear(in_features=1024, out_features=64),
            Linear(in_features=64, out_features=10)
        )
    def forward(self, x):
        x = self.model1(x)
        return x
loss = nn.CrossEntropyLoss()
nueralnetwork = NueralNetwork()
# 定义优化器
optim = torch.optim.SGD(nueralnetwork.parameters(), lr=0.01)
for epoch in range(20):
    running_loss = 0.0
    for data in dataloader:
        imgs, targets = data
        outputs = nueralnetwork(imgs)
        result_loss = loss(outputs, targets)
        # 梯度清零
        optim.zero_grad()
        result_loss.backward()
        optim.step()
        running_loss += result_loss
    print(running_loss)

88aa589c9d244d38915c3190a12eaf2f.png

断点调试

0a0521858b55499fb5a4b646e06f4871.png595e0af8bd5040edbee945a2e75018de.png03aeec62f05b4885aef1ae60e06defb7.png

网络模型的增删改

# model_pretrained.py
import torchvision
# train_data = torchvision.datasets.ImageNet("../data_image_net", split='train', download=True,
#                                            transform=torchvision.transforms.ToTensor())
RuntimeError: The dataset is no longer publicly accessible. You need to download the archives externally and place them in the root directory.
from torch import nn
vgg16_false = torchvision.models.vgg16(pretrained=False)
vgg16_true = torchvision.models.vgg16(pretrained=True)
print(vgg16_true)

96c624a099b24ff6adb2d42ec985986e.png

train_data = torchvision.datasets.CIFAR10('../data', train=True, transform=torchvision.transforms.ToTensor(),
                                          download=True)
vgg16_true.classifier.add_module('add_linear', nn.Linear(1000, 10))
print(vgg16_true)
print(vgg16_false) 

6424f732c3174785b43ae65ac588a858.png

vgg16_false.classifier[6] = nn.Linear(4096, 10)
print(vgg16_false)

bd97f494fb30448ea4d2908f6b9c9d1a.png

数据集下载的位置:Downloading:

https://download.pytorch.org/models/vgg16-397923af.pth

to C:\Users\15718/.cache\torch\hub\checkpoints\vgg16-397923af.pth


***如果遇到数据集较大,网络不通畅,建议:复制下载链接,用迅雷下 ***

模型的保存与加载

# model_save.py
import torch
import torchvision
from torch import nn
vgg16 = torchvision.models.vgg16(pretrained=False)
# 保存方式一     模型结构 + 模型参数
torch.save(vgg16, "vgg16_method1.pth")
# 保存方式二     模型参数(官方推荐)
torch.save(vgg16.state_dict(), "vgg16_method2.pth")
# 陷阱
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
    def forward(self,x):
        x = self.conv1(x)
        return x
neuralnetwork = NeuralNetwork()
torch.save(neuralnetwork, "neuralnetwork_method1.pth")
# model_load.py
import torch
from model_save import *
# 方式一  --》 保存方式一,加载模型
model = torch.load("vgg16_method1.pth")
# print(model)

ef52101fc1334db79d305d117d11fffa.png

# 方式二   加载模型,字典格式的
model = torch.load("vgg16_method2.pth")
print(model)

308aa6fbd7c8410aaab154410df7eea4.png

# 方式二   加载模型,VGG格式的
vgg16 = torchvision.models.vgg16(pretrained=False)
vgg16.load_state_dict(torch.load("vgg16_method2.pth"))
print(vgg16)

ddf89b82c1ae41a482d0c3a75eaf70b3.png

# 陷阱
model = torch.load("neuralnetwork_method1.pth")
print(model)


6e42c4d4ae8048db838c0042f64e6d84.png

模型训练套路

# model.py
# @Time: 2022/1/15 9:03
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
from torch import nn
#搭建神经网络
class NurealNetwork(nn.Module):
    def __init__(self):
        super(NurealNetwork, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(),
            nn.Linear(64*4*4, 64),
            nn.Linear(64, 10)
        )
    def forward(self, x):
        x = self.model(x)
        return x
if __name__ == '__main__':
    nurealnetwork = NurealNetwork()
    input = torch.ones((64, 3, 32, 32))
    output = nurealnetwork(input)
    print(output.shape)
# train.py
# @Time: 2022/1/15 8:43
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
# 准备数据集
import torch.optim
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from model import *
train_data = torchvision.datasets.CIFAR10(root="../dataset", train=True,transform=torchvision.transforms.ToTensor(),
                                          download=True)
test_data = torchvision.datasets.CIFAR10(root="../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                         download=True)
train_data_size = len(train_data)
test_data_size = len(test_data)
# python中格式化字符串的写法,如果train_data_size = 10, 输出-->训练数据集的长度为:10
print("训练数据集的长度为:{}".format(train_data_size))
print("测试数据集的长度为:{}".format(test_data_size))
# 利用DataLoader加载数据集
train_dataloader = DataLoader(train_data,batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
# 创建神经网络
nurealnetwork = NurealNetwork()
# 损失函数
loss_fn = nn.CrossEntropyLoss()
# 优化器
# learning_rate = 0.01
learning_rate = 1e-2
optimizer = torch.optim.SGD(nurealnetwork.parameters(), lr=learning_rate)
# 设置训练网络的一些参数
# 记录训练次数
total_train_step = 0
# 记录测试次数
total_test_step = 0
# 训练的轮数
epoch = 10
# 添加Tensorboard
writer = SummaryWriter("../logs_train")
for i in range(epoch):
    print("------第 {} 轮训练开始------".format(i + 1))
    # 训练开始
    nurealnetwork.train()
    for data in train_dataloader:
        imgs, targets = data
        outputs = nurealnetwork(imgs)
        loss = loss_fn(outputs, targets)
        # 优化器优化模型
        # 梯度清零
        optimizer.zero_grad()
        # 反向传播得出每一个梯度
        loss.backward()
        # 对其中的参数进行优化
        optimizer.step()
        # 训练结束,total_train_step加一
        total_train_step += 1
        # 减少打印的量
        if total_train_step % 100 == 0:
            print("训练次数:{} , Loss: {}".format(total_train_step, loss.item()))
            writer.add_scalar("train_loss", loss.item(), total_train_step)
    #验证集
    nurealnetwork.eval()
    total_test_loss = 0
    total_accuracy = 0
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            outputs = nurealnetwork(imgs)
            # 比较输出与目标之间的差距
            loss = loss_fn(outputs, targets)
            total_test_loss += loss.item()
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy += accuracy
    print("整体测试集上的Loss:{}".format(total_test_loss))
    print("整体测试集上的正确率:{}".format(total_accuracy / test_data_size))
    writer.add_scalar("test_loss", total_test_loss, total_test_step)
    writer.add_scalar("test_accracy", total_accuracy / test_data_size, total_test_step)
    total_test_step += 1
    # 保存模型
    torch.save(nurealnetwork, "nurealnetwork_{}.pth".format(i))
    #官方推荐保存方式
    # torch.save(nurealnetwork.state_dict(), "nurealnetwork_{}.pth".format(i))
    print("模型已保存")
writer.close()

96fc9a5a34a14c03aeb32aadf3d69a77.png


43239704ec424d93b67896e4a0714e7f.png


eeb153f9f03944b2899b419d502d2918.png

GPU训练模型

CPU训练 vs GPU训练

CPU 用时

77d016ebbb5b4b9887c86d203ed884af.png

GPU 用时

51cd43269bd845efbdfe018ac850ead6.png

用任意的模型测试神经网络能否预测成功

# test.py
# @Time: 2022/1/15 15:40
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from PIL import Image
from torch import nn
image_path = "../images/dog.png"
image = Image.open(image_path)
print(image)
image = image.convert("RGB")
transform =torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
                                           torchvision.transforms.ToTensor()])
image =transform(image)
print(image.shape)
class Tudui(nn.Module):
    def __init__(self):
        super(Tudui, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(),
            nn.Linear(64*4*4, 64),
            nn.Linear(64, 10)
        )
    def forward(self, x):
        x = self.model(x)
        return x
model = torch.load("tudui_29_gpu.pth")
print(model)
image = torch.reshape(image, (1, 3, 32, 32))
model.eval()
with torch.no_grad():
    image = image.cuda()
    output = model(image)
print(output)
print(output.argmax(1))

364c6467ef664d96b33f32ae1a7807a6.png

五、项目实战

YOLO v5训练

下载代码:https://github.com/ultralytics/yolov5/tree/v6.0

安装,配置环境

pip install -r requirements.txt

如果安装依赖库太慢了或者失败了,建议在Anaconda命令行里,进入项目的对应仓库里,安装依赖库,命令如下

pip install 依赖库 -i https://pypi.tuna.tsinghua.edu.cn/simple


d418b89817064b148dd93922cf334d8a.png

def parse_opt():
    parser = argparse.ArgumentParser()
    # 设置权重,可选预训练数据集模型有yolov5s.pt yolov5m.pt yolov5l.pt yolov5x.pt 依次增大,训练所需时间也相应更久,效果更好
    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
    # 设置训练的径在哪里
    parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
    # 默认指定了图片尺寸640px,
    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
    parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
    #指定是用CPU训练还是GPU训练,如果有多个GPU,可以同时开启
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    # 训练过程中可以预览图片,如果是训练视频则可以预览视频
    parser.add_argument('--view-img', action='store_true', help='show results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--visualize', action='store_true', help='visualize features')
    parser.add_argument('--update', action='store_true', help='update all models')
    parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
    parser.add_argument('--name', default='exp', help='save results to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
    parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
    parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
    opt = parser.parse_args()
    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
    print_args(FILE.stem, opt)
    return opt

b5fc6247e5d848ad99bc203ff90d0762.png

5fb55308ae9847b3a353d36163d94818.png

效果如图:

image.jpeg



网络异常,图片无法展示
|


image.jpeg



image.jpeg


image.jpeg

视频检测


image.png

将batch_size = 32 调为4,不然显卡会带不动

测试集跑完整个视频,大概花了半个小时

本地跑不动的可以用Google colab 云端服务器跑

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
    # 指定视频的路径
    parser.add_argument('--source', type=str, default='data/video/movie.mp4', help='source')  # file/folder, 0 for webcam
    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--view-img', action='store_true', help='display results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--update', action='store_true', help='update all models')
    parser.add_argument('--project', default='runs/detect', help='save results to project/name')
    parser.add_argument('--name', default='exp', help='save results to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    opt = parser.parse_args()
    print(opt)
    check_requirements(exclude=('pycocotools', 'thop'))
    with torch.no_grad():
        if opt.update:  # update all models (to fix SourceChangeWarning)
            for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
                detect()
                strip_optimizer(opt.weights)
        else:
            detect()

视频截图:

image.png

训练coco128数据集

设置参数

40937eb6df4c4b569335931f518583ce.png

4a17415166164eeb968675c6963eaa4d.png73e4ed159fcd4ac3aabc9e0477607813.png


f695b1c03b9a473ebaa24e2c6405b8ea.png

e5abae92b92e4b4bb4ec4979a5dcf5ed.png

image.png

image.png


image.png

训练结果:



image.png


image.png



image.png


image.png

VOC2007(Visual Object Classes)数据集


image.png

Annotations:包含了xml文件,描述了图片的各种信息,特别是标注出了目标的位置坐标

ImageSets:主要关注下main文件夹的内容,里面的文件包含了不同类别的训练/验证数据集图片名称

JPEGImages:原图片

SegmentatioClass、SegmenObject:语义分割

coco数据集(Common Objects in Context)

待更新

自制数据集,训练

待更新

FaceMaskDetection 人脸面罩检测

源码https://github.com/AIZOOTech/FaceMaskDetection

# -*- coding:utf-8 -*-
import cv2
import time
import argparse
import numpy as np
from PIL import Image
from utils.anchor_generator import generate_anchors
from utils.anchor_decode import decode_bbox
from utils.nms import single_class_non_max_suppression
from load_model.pytorch_loader import load_pytorch_model, pytorch_inference
# model = load_pytorch_model('models/face_mask_detection.pth');
model = load_pytorch_model('models/model360.pth')
# anchor configuration
#feature_map_sizes = [[33, 33], [17, 17], [9, 9], [5, 5], [3, 3]]
feature_map_sizes = [[45, 45], [23, 23], [12, 12], [6, 6], [4, 4]]
anchor_sizes = [[0.04, 0.056], [0.08, 0.11], [0.16, 0.22], [0.32, 0.45], [0.64, 0.72]]
anchor_ratios = [[1, 0.62, 0.42]] * 5
# generate anchors
anchors = generate_anchors(feature_map_sizes, anchor_sizes, anchor_ratios)
# for inference , the batch size is 1, the model output shape is [1, N, 4],
# so we expand dim for anchors to [1, anchor_num, 4]
anchors_exp = np.expand_dims(anchors, axis=0)
id2class = {0: 'Mask', 1: 'NoMask'}
def inference(image,
              conf_thresh=0.5,
              iou_thresh=0.4,
              target_shape=(160, 160),
              draw_result=True,
              show_result=True
              ):
    '''
    Main function of detection inference
    :param image: 3D numpy array of image
    :param conf_thresh: the min threshold of classification probabity.
    :param iou_thresh: the IOU threshold of NMS
    :param target_shape: the model input size.
    :param draw_result: whether to daw bounding box to the image.
    :param show_result: whether to display the image.
    :return:
    '''
    # image = np.copy(image)
    output_info = []
    height, width, _ = image.shape
    image_resized = cv2.resize(image, target_shape)
    image_np = image_resized / 255.0  # 归一化到0~1
    image_exp = np.expand_dims(image_np, axis=0)
    image_transposed = image_exp.transpose((0, 3, 1, 2))
    y_bboxes_output, y_cls_output = pytorch_inference(model, image_transposed)
    # remove the batch dimension, for batch is always 1 for inference.
    y_bboxes = decode_bbox(anchors_exp, y_bboxes_output)[0]
    y_cls = y_cls_output[0]
    # To speed up, do single class NMS, not multiple classes NMS.
    bbox_max_scores = np.max(y_cls, axis=1)
    bbox_max_score_classes = np.argmax(y_cls, axis=1)
    # keep_idx is the alive bounding box after nms.
    keep_idxs = single_class_non_max_suppression(y_bboxes,
                                                 bbox_max_scores,
                                                 conf_thresh=conf_thresh,
                                                 iou_thresh=iou_thresh,
                                                 )
    for idx in keep_idxs:
        conf = float(bbox_max_scores[idx])
        class_id = bbox_max_score_classes[idx]
        bbox = y_bboxes[idx]
        # clip the coordinate, avoid the value exceed the image boundary.
        xmin = max(0, int(bbox[0] * width))
        ymin = max(0, int(bbox[1] * height))
        xmax = min(int(bbox[2] * width), width)
        ymax = min(int(bbox[3] * height), height)
        if draw_result:
            if class_id == 0:
                color = (0, 255, 0)
            else:
                color = (255, 0, 0)
            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2)
            cv2.putText(image, "%s: %.2f" % (id2class[class_id], conf), (xmin + 2, ymin - 2),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, color)
        output_info.append([class_id, conf, xmin, ymin, xmax, ymax])
    if show_result:
        Image.fromarray(image).show()
    return output_info
def run_on_video(video_path, output_video_name, conf_thresh):
    cap = cv2.VideoCapture(video_path)
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    # writer = cv2.VideoWriter(output_video_name, fourcc, int(fps), (int(width), int(height)))
    total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    if not cap.isOpened():
        raise ValueError("Video open failed.")
        return
    status = True
    idx = 0
    while status:
        start_stamp = time.time()
        status, img_raw = cap.read()
        img_raw = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB)
        read_frame_stamp = time.time()
        if (status):
            inference(img_raw,
                      conf_thresh,
                      iou_thresh=0.5,
                      target_shape=(360, 360),
                      draw_result=True,
                      show_result=False)
            cv2.imshow('image', img_raw[:, :, ::-1])
            cv2.waitKey(1)
            inference_stamp = time.time()
            # writer.write(img_raw)
            write_frame_stamp = time.time()
            idx += 1
            print("%d of %d" % (idx, total_frames))
            print("read_frame:%f, infer time:%f, write time:%f" % (read_frame_stamp - start_stamp,
                                                                   inference_stamp - read_frame_stamp,
                                                                   write_frame_stamp - inference_stamp))
    # writer.release()
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Face Mask Detection")
    parser.add_argument('--img-mode', type=int, default=1, help='set 1 to run on image, 0 to run on video.')
    parser.add_argument('--img-path', type=str, default='img_test/test5.jpg', help='path to your image.')
    parser.add_argument('--video-path', type=str, default='0', help='path to your video, `0` means to use camera.')
    # parser.add_argument('--hdf5', type=str, help='keras hdf5 file')
    args = parser.parse_args()
    if args.img_mode:
        imgPath = args.img_path
        img = cv2.imread(imgPath)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        inference(img, show_result=True, target_shape=(360, 360))
    else:
        video_path = args.video_path
        if args.video_path == '0':
            video_path = 0
        run_on_video(video_path, '', conf_thresh=0.5)

网络异常,图片无法展示
|

图片检测结果:

python pytorch_infer.py  --img-path /path/to/your/img

前:

网络异常,图片无法展示
|


网络异常,图片无法展示
|

视频流检测

python pytorch_infer.py --img-mode 0 --video-path /path/to/video  
# 如果要打开本地摄像头, video_path填写0就可以了,如下
python pytorch_infer.py --img-mode 0 --video-path 0


网络异常,图片无法展示
|

–end–

相关实践学习
基于阿里云DeepGPU实例,用AI画唯美国风少女
本实验基于阿里云DeepGPU实例,使用aiacctorch加速stable-diffusion-webui,用AI画唯美国风少女,可提升性能至高至原性能的2.6倍。
相关文章
|
11天前
|
机器学习/深度学习 监控 自动驾驶
深度学习之2D目标检测
2D目标检测是深度学习中的一个关键任务,旨在识别图像中的目标对象,并在每个目标对象周围生成一个边界框。该任务在自动驾驶、视频监控、机器人视觉等领域具有广泛应用。以下是对深度学习中2D目标检测的详细介绍,包括其基本概念、主要方法、常见模型、应用场景、优势和挑战。
18 4
|
17天前
|
机器学习/深度学习 传感器 算法
基于Mediapipe深度学习算法的手势识别系统【含python源码+PyqtUI界面+原理详解】-python手势识别 深度学习实战项目
基于Mediapipe深度学习算法的手势识别系统【含python源码+PyqtUI界面+原理详解】-python手势识别 深度学习实战项目
|
17天前
|
机器学习/深度学习 存储 计算机视觉
基于YOLOv8深度学习的PCB板缺陷检测系统【python源码+Pyqt5界面+数据集+训练代码】目标检测
基于YOLOv8深度学习的PCB板缺陷检测系统【python源码+Pyqt5界面+数据集+训练代码】目标检测
|
16天前
|
机器学习/深度学习 算法 PyTorch
《PyTorch深度学习实践》--3梯度下降算法
《PyTorch深度学习实践》--3梯度下降算法
|
16天前
|
机器学习/深度学习 PyTorch 算法框架/工具
《PyTorch深度学习实践》--2线性模型
《PyTorch深度学习实践》--2线性模型
|
16天前
|
机器学习/深度学习 自然语言处理 前端开发
深度学习-[源码+数据集]基于LSTM神经网络黄金价格预测实战
深度学习-[源码+数据集]基于LSTM神经网络黄金价格预测实战
|
17天前
|
机器学习/深度学习 存储 算法
基于YOLOv8与ByteTrack的车辆行人多目标检测与追踪系统【python源码+Pyqt5界面+数据集+训练代码】深度学习实战、目标追踪、运动物体追踪
基于YOLOv8与ByteTrack的车辆行人多目标检测与追踪系统【python源码+Pyqt5界面+数据集+训练代码】深度学习实战、目标追踪、运动物体追踪
|
17天前
|
机器学习/深度学习 存储 安全
基于YOLOv8深度学习的行人跌倒检测系统【python源码+Pyqt5界面+数据集+训练代码】目标检测
基于YOLOv8深度学习的行人跌倒检测系统【python源码+Pyqt5界面+数据集+训练代码】目标检测
|
17天前
|
机器学习/深度学习 算法 数据库
【功能超全】基于OpenCV车牌识别停车场管理系统软件开发【含python源码+PyqtUI界面+功能详解】-车牌识别python 深度学习实战项目
【功能超全】基于OpenCV车牌识别停车场管理系统软件开发【含python源码+PyqtUI界面+功能详解】-车牌识别python 深度学习实战项目
|
17天前
|
机器学习/深度学习 算法 数据可视化
【深度学习实战】基于深度学习的图片风格快速迁移软件(Python源码+UI界面)
【深度学习实战】基于深度学习的图片风格快速迁移软件(Python源码+UI界面)