四、神经网络的搭建

# nn_module.py
# @Time: 2022/1/13 21:42
# @Author: 金阳
# @Parameter：
# @Version: 1.0.1
import torch
from torch import nn
class NeuralNetwork(nn.Module):
    def __init__(self) -> None:
        super().__init__()
    def forward(self, input):
        output = input + 1
        return output
neuralnetwork = NeuralNetwork()
x = torch.tensor(1.0)
output = neuralnetwork(x)
print(output)

二维卷积层Convolution Layers

# nn_conv.py
# @Time: 2022/1/13 22:08
# @Author: 金阳
# @Parameter：
# @Version: 1.0.1
import torch
import torch.nn.functional as F
input = torch.tensor([[1, 2, 0, 3, 1],
                      [0, 1, 2, 3, 1],
                      [1, 2, 1, 0, 0],
                      [5, 2, 3, 1, 1],
                      [2, 1, 0, 1, 1]])
kernel = torch.tensor([[1, 2, 1],
                       [0, 1, 0],
                       [2, 1, 0]])
input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))
print(input.shape)
print(kernel.shape)
output = F.conv2d(input, kernel, stride=1)
print(output)
output2 = F.conv2d(input, kernel, stride=2)
print(output2)
output3 = F.conv2d(input, kernel, stride=1, padding=1)
print(output3)

# src/nn_conv2d.py
# @Time: 2022/1/13 22:37
# @Author: 金阳
# @Parameter：
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=64)
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
    def forward(self,x):
        x = self.conv1(x)
        return x
neuralnetwork = NeuralNetwork()
# print(neuralnetwork)
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
    imgs, targets = data
    output = neuralnetwork(imgs)
    print(imgs.shape)
    print(output.shape)
    # torch.Size([64, 3, 32, 32])
    writer.add_images("input", imgs, step)
    # torch.Size([64, 6, 30, 30])
    output = torch.reshape(output, (-1, 3, 30, 30))
    writer.add_images("output", output, step)
    step += 1
writer.close()

池化层 Pooling layers

# nn_maxpool.py
# @Time: 2022/1/14 9:53
# @Author: 金阳
# @Parameter：
# @Version: 1.0.1
import torch
from torch import nn
from torch.nn import MaxPool2d
input = torch.tensor([[1, 2, 0, 3, 1],
                      [0, 1, 2, 3, 1],
                      [1, 2, 1, 0, 0],
                      [5, 2, 3, 1, 1],
                      [2, 1, 0, 1, 1]], dtype=torch.float32)
input = torch.reshape(input, (-1, 1, 5, 5))
print(input.shape)
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)
    def forward(self, input):
        output = self.maxpool1(input)
        return output
nueralnetwork = NueralNetwork()
output = nueralnetwork(input)
print(output)
tensor([[[[2., 3.],
          [5., 1.]]]])

ceil_mode=False
tensor([[[[2.]]]])

# @Time: 2022/1/14 9:53
# @Author: 金阳
# @Parameter：
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import MaxPool2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=64)
# input = torch.tensor([[1, 2, 0, 3, 1],
#                       [0, 1, 2, 3, 1],
#                       [1, 2, 1, 0, 0],
#                       [5, 2, 3, 1, 1],
#                       [2, 1, 0, 1, 1]], dtype=torch.float32)
#
# input = torch.reshape(input, (-1, 1, 5, 5))
# print(input.shape)
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)
    def forward(self, input):
        output = self.maxpool1(input)
        return output
nueralnetwork = NueralNetwork()
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
    imgs, targets = data
    writer.add_images("input", imgs, step)
    output = nueralnetwork(imgs)
    writer.add_images("output", output, step)
    step += 1
writer.close()
# output = nueralnetwork(input)
#
# print(output)

非线性激活Non-linear Activations (weighted sum, nonlinearity)

nn. ReLU

import torch
import torchvision
from torch import nn
from torch.nn import ReLU
from torch.utils.data import DataLoader
input = torch.tensor([[1, -0.5],
                      [-1, 3]])
input = torch.reshape(input, (-1, 1, 2, 2))
print(input.shape)
print(input)
# tensor([[[[ 1.0000, -0.5000],
          [-1.0000,  3.0000]]]])
class NerualNetwork(nn.Module):
    def __init__(self):
        super(NerualNetwork, self).__init__()
        self.relu1 = ReLU()
    def forward(self, input):
        output = self.relu1(input)
        return output
nerualnetwork = NerualNetwork()
output = nerualnetwork(input)
print(output)
tensor([[[[1., 0.],
          [0., 3.]]]])

nn.Sigmoid

tensorboard里面 step 不从0 开始，是图片显示的问题，用命令：

tensorboard --logdir=logs --samples_per_plugin images=1000

# @Time: 2022/1/14 10:34
# @Author: 金阳
# @Parameter：
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import ReLU, Sigmoid
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
input = torch.tensor([[1, -0.5],
                      [-1, 3]])
input = torch.reshape(input, (-1, 1, 2, 2))
print(input.shape)
print(input)
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=64, )
class NerualNetwork(nn.Module):
    def __init__(self):
        super(NerualNetwork, self).__init__()
        self.relu1 = ReLU()
        self.sigmod1 = Sigmoid()
    def forward(self, input):
        output = self.sigmod1(input)
        return output
nerualnetwork = NerualNetwork()
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
    imgs, target = data
    writer.add_images("input", imgs, global_step=step)
    output = nerualnetwork(imgs)
    writer.add_images("output", output, global_step=step)
    step += 1
writer.close()

正则化Normalization Layers

nn.BatchNorm2d

Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift .

Recurrent Layers

nn.RNN

nn.LSTM

Sparse Layers

nn.Embedding

A simple lookup table that stores embeddings of a fixed dictionary and size.

Linear Layers

# nn_linear.py
# @Time: 2022/1/14 11:48
# @Author: 金阳
# @Parameter：
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import Linear
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=64)
class NerualNetwork(nn.Module):
    def __init__(self):
        super(NerualNetwork, self).__init__()
        self.linear1 = Linear(196608, 10)
    def forward(self, input):
        output = self.linear1(input)
        return output
nerualnetwork = NerualNetwork()
for data in dataloader:
    imgs, targets =data
    print(imgs.shape)
    # torch.Size([64, 3, 32, 32])
    # output = torch.reshape(imgs, (1, 1, 1, -1))
    output = torch.flatten(imgs)
    print(output.shape)
    # torch.Size([196608])
    output = nerualnetwork(output)
    print(output.shape)
    # torch.Size([10])

SEQUENTIAL

# nn_seq.py
# @Time: 2022/1/14 12:57
# @Author: 金阳
# @Parameter：
# @Version: 1.0.1
import torch
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.tensorboard import SummaryWriter
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        # self.conv1 = Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2)
        # self.maxpool1 = MaxPool2d(kernel_size=2)
        # self.conv2 = Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2)
        # self.maxpool2 = MaxPool2d(kernel_size=2)
        # self.conv3 = Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
        # self.maxpool3 = MaxPool2d(kernel_size=2)
        # self.flatten = Flatten()
        # self.linear1 = Linear(in_features=1024, out_features=64)
        # self.linear2 = Linear(in_features=64, out_features=10)
        # 简单的写法
        self.model1 = Sequential(
            Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Flatten(),
            Linear(in_features=1024, out_features=64),
            Linear(in_features=64, out_features=10)
        )
    def forward(self, x):
        # x = self.conv1(x)
        # x = self.maxpool1(x)
        # x = self.conv2(x)
        # x = self.maxpool2(x)
        # x = self.conv3(x)
        # x = self.maxpool3(x)
        # x = self.flatten(x)
        # x = self.linear1(x)
        # x = self.linear2(x)
        x = self.model1(x)
        return x
nueralnetwork = NueralNetwork()
print(nueralnetwork)
input = torch.ones((64, 3, 32, 32))
output = nueralnetwork(input)
print(output.shape)
writer = SummaryWriter("../logs")
writer.add_graph(nueralnetwork, input)
writer.close()

输出结果是

NueralNetwork(
  (model1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=1024, out_features=64, bias=True)
    (8): Linear(in_features=64, out_features=10, bias=True)
  )
)
torch.Size([64, 10])

损失函数Loss Functions

nn.L1Loss

nn.MSELoss

# nn_loss_network.py
import torchvision
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, download=True,
                                       transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=1)
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        self.model1 = Sequential(
            Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Flatten(),
            Linear(in_features=1024, out_features=64),
            Linear(in_features=64, out_features=10)
        )
    def forward(self, x):
        x = self.model1(x)
        return x
loss = nn.CrossEntropyLoss()
nueralnetwork = NueralNetwork()
for data in dataloader:
    imgs, targets = data
    outputs = nueralnetwork(imgs)
    result_loss = loss(outputs, targets)
    print(result_loss)
    print(outputs)
    print(targets)

nn.CrossEntropyLoss

优化器TORCH.OPTIM

# nn_optim.py
# @Time: 2022/1/14 20:21
# @Author: 金阳
# @Parameter：
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, download=True,
                                       transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=1)
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        self.model1 = Sequential(
            Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Flatten(),
            Linear(in_features=1024, out_features=64),
            Linear(in_features=64, out_features=10)
        )
    def forward(self, x):
        x = self.model1(x)
        return x
loss = nn.CrossEntropyLoss()
nueralnetwork = NueralNetwork()
# 定义优化器
optim = torch.optim.SGD(nueralnetwork.parameters(), lr=0.01)
for epoch in range(20):
    running_loss = 0.0
    for data in dataloader:
        imgs, targets = data
        outputs = nueralnetwork(imgs)
        result_loss = loss(outputs, targets)
        # 梯度清零
        optim.zero_grad()
        result_loss.backward()
        optim.step()
        running_loss += result_loss
    print(running_loss)

断点调试

网络模型的增删改

# model_pretrained.py
import torchvision
# train_data = torchvision.datasets.ImageNet("../data_image_net", split='train', download=True,
#                                            transform=torchvision.transforms.ToTensor())
RuntimeError: The dataset is no longer publicly accessible. You need to download the archives externally and place them in the root directory.
from torch import nn
vgg16_false = torchvision.models.vgg16(pretrained=False)
vgg16_true = torchvision.models.vgg16(pretrained=True)
print(vgg16_true)

train_data = torchvision.datasets.CIFAR10('../data', train=True, transform=torchvision.transforms.ToTensor(),
                                          download=True)
vgg16_true.classifier.add_module('add_linear', nn.Linear(1000, 10))
print(vgg16_true)
print(vgg16_false)

vgg16_false.classifier[6] = nn.Linear(4096, 10)
print(vgg16_false)

数据集下载的位置：Downloading:

https://download.pytorch.org/models/vgg16-397923af.pth

to C:\Users\15718/.cache\torch\hub\checkpoints\vgg16-397923af.pth

***如果遇到数据集较大，网络不通畅，建议：复制下载链接，用迅雷下 ***

模型的保存与加载

# model_save.py
import torch
import torchvision
from torch import nn
vgg16 = torchvision.models.vgg16(pretrained=False)
# 保存方式一     模型结构 + 模型参数
torch.save(vgg16, "vgg16_method1.pth")
# 保存方式二     模型参数（官方推荐）
torch.save(vgg16.state_dict(), "vgg16_method2.pth")
# 陷阱
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
    def forward(self,x):
        x = self.conv1(x)
        return x
neuralnetwork = NeuralNetwork()
torch.save(neuralnetwork, "neuralnetwork_method1.pth")

# model_load.py
import torch
from model_save import *
# 方式一  --》 保存方式一，加载模型
model = torch.load("vgg16_method1.pth")
# print(model)

# 方式二   加载模型，字典格式的
model = torch.load("vgg16_method2.pth")
print(model)

# 方式二   加载模型，VGG格式的
vgg16 = torchvision.models.vgg16(pretrained=False)
vgg16.load_state_dict(torch.load("vgg16_method2.pth"))
print(vgg16)

# 陷阱
model = torch.load("neuralnetwork_method1.pth")
print(model)

模型训练套路

# model.py
# @Time: 2022/1/15 9:03
# @Author: 金阳
# @Parameter：
# @Version: 1.0.1
import torch
from torch import nn
#搭建神经网络
class NurealNetwork(nn.Module):
    def __init__(self):
        super(NurealNetwork, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(),
            nn.Linear(64*4*4, 64),
            nn.Linear(64, 10)
        )
    def forward(self, x):
        x = self.model(x)
        return x
if __name__ == '__main__':
    nurealnetwork = NurealNetwork()
    input = torch.ones((64, 3, 32, 32))
    output = nurealnetwork(input)
    print(output.shape)

# train.py
# @Time: 2022/1/15 8:43
# @Author: 金阳
# @Parameter：
# @Version: 1.0.1
# 准备数据集
import torch.optim
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from model import *
train_data = torchvision.datasets.CIFAR10(root="../dataset", train=True,transform=torchvision.transforms.ToTensor(),
                                          download=True)
test_data = torchvision.datasets.CIFAR10(root="../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                         download=True)
train_data_size = len(train_data)
test_data_size = len(test_data)
# python中格式化字符串的写法，如果train_data_size = 10, 输出-->训练数据集的长度为：10
print("训练数据集的长度为：{}".format(train_data_size))
print("测试数据集的长度为：{}".format(test_data_size))
# 利用DataLoader加载数据集
train_dataloader = DataLoader(train_data,batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
# 创建神经网络
nurealnetwork = NurealNetwork()
# 损失函数
loss_fn = nn.CrossEntropyLoss()
# 优化器
# learning_rate = 0.01
learning_rate = 1e-2
optimizer = torch.optim.SGD(nurealnetwork.parameters(), lr=learning_rate)
# 设置训练网络的一些参数
# 记录训练次数
total_train_step = 0
# 记录测试次数
total_test_step = 0
# 训练的轮数
epoch = 10
# 添加Tensorboard
writer = SummaryWriter("../logs_train")
for i in range(epoch):
    print("------第 {} 轮训练开始------".format(i + 1))
    # 训练开始
    nurealnetwork.train()
    for data in train_dataloader:
        imgs, targets = data
        outputs = nurealnetwork(imgs)
        loss = loss_fn(outputs, targets)
        # 优化器优化模型
        # 梯度清零
        optimizer.zero_grad()
        # 反向传播得出每一个梯度
        loss.backward()
        # 对其中的参数进行优化
        optimizer.step()
        # 训练结束，total_train_step加一
        total_train_step += 1
        # 减少打印的量
        if total_train_step % 100 == 0:
            print("训练次数：{} , Loss: {}".format(total_train_step, loss.item()))
            writer.add_scalar("train_loss", loss.item(), total_train_step)
    #验证集
    nurealnetwork.eval()
    total_test_loss = 0
    total_accuracy = 0
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            outputs = nurealnetwork(imgs)
            # 比较输出与目标之间的差距
            loss = loss_fn(outputs, targets)
            total_test_loss += loss.item()
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy += accuracy
    print("整体测试集上的Loss：{}".format(total_test_loss))
    print("整体测试集上的正确率：{}".format(total_accuracy / test_data_size))
    writer.add_scalar("test_loss", total_test_loss, total_test_step)
    writer.add_scalar("test_accracy", total_accuracy / test_data_size, total_test_step)
    total_test_step += 1
    # 保存模型
    torch.save(nurealnetwork, "nurealnetwork_{}.pth".format(i))
    #官方推荐保存方式
    # torch.save(nurealnetwork.state_dict(), "nurealnetwork_{}.pth".format(i))
    print("模型已保存")
writer.close()

GPU训练模型

CPU训练 vs GPU训练

CPU 用时

GPU 用时

用任意的模型测试神经网络能否预测成功

# test.py
# @Time: 2022/1/15 15:40
# @Author: 金阳
# @Parameter：
# @Version: 1.0.1
import torch
import torchvision
from PIL import Image
from torch import nn
image_path = "../images/dog.png"
image = Image.open(image_path)
print(image)
image = image.convert("RGB")
transform =torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
                                           torchvision.transforms.ToTensor()])
image =transform(image)
print(image.shape)
class Tudui(nn.Module):
    def __init__(self):
        super(Tudui, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(),
            nn.Linear(64*4*4, 64),
            nn.Linear(64, 10)
        )
    def forward(self, x):
        x = self.model(x)
        return x
model = torch.load("tudui_29_gpu.pth")
print(model)
image = torch.reshape(image, (1, 3, 32, 32))
model.eval()
with torch.no_grad():
    image = image.cuda()
    output = model(image)
print(output)
print(output.argmax(1))

五、项目实战

YOLO v5训练

下载代码：https://github.com/ultralytics/yolov5/tree/v6.0

安装，配置环境

pip install -r requirements.txt

如果安装依赖库太慢了或者失败了，建议在Anaconda命令行里，进入项目的对应仓库里，安装依赖库，命令如下

pip install 依赖库 -i https://pypi.tuna.tsinghua.edu.cn/simple

def parse_opt():
    parser = argparse.ArgumentParser()
    # 设置权重，可选预训练数据集模型有yolov5s.pt yolov5m.pt yolov5l.pt yolov5x.pt 依次增大，训练所需时间也相应更久，效果更好
    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
    # 设置训练的径在哪里
    parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
    # 默认指定了图片尺寸640px，
    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
    parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
    #指定是用CPU训练还是GPU训练，如果有多个GPU，可以同时开启
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    # 训练过程中可以预览图片，如果是训练视频则可以预览视频
    parser.add_argument('--view-img', action='store_true', help='show results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--visualize', action='store_true', help='visualize features')
    parser.add_argument('--update', action='store_true', help='update all models')
    parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
    parser.add_argument('--name', default='exp', help='save results to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
    parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
    parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
    opt = parser.parse_args()
    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
    print_args(FILE.stem, opt)
    return opt

效果如图：

网络异常，图片无法展示

视频检测

将batch_size = 32 调为4，不然显卡会带不动

测试集跑完整个视频，大概花了半个小时

本地跑不动的可以用Google colab 云端服务器跑

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
    # 指定视频的路径
    parser.add_argument('--source', type=str, default='data/video/movie.mp4', help='source')  # file/folder, 0 for webcam
    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--view-img', action='store_true', help='display results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--update', action='store_true', help='update all models')
    parser.add_argument('--project', default='runs/detect', help='save results to project/name')
    parser.add_argument('--name', default='exp', help='save results to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    opt = parser.parse_args()
    print(opt)
    check_requirements(exclude=('pycocotools', 'thop'))
    with torch.no_grad():
        if opt.update:  # update all models (to fix SourceChangeWarning)
            for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
                detect()
                strip_optimizer(opt.weights)
        else:
            detect()

视频截图：

训练coco128数据集

设置参数

训练结果：

VOC2007(Visual Object Classes)数据集

Annotations：包含了xml文件，描述了图片的各种信息，特别是标注出了目标的位置坐标

ImageSets：主要关注下main文件夹的内容，里面的文件包含了不同类别的训练/验证数据集图片名称

JPEGImages：原图片

SegmentatioClass、SegmenObject：语义分割

coco数据集(Common Objects in Context)

待更新

自制数据集，训练

待更新

FaceMaskDetection 人脸面罩检测

源码https://github.com/AIZOOTech/FaceMaskDetection

# -*- coding:utf-8 -*-
import cv2
import time
import argparse
import numpy as np
from PIL import Image
from utils.anchor_generator import generate_anchors
from utils.anchor_decode import decode_bbox
from utils.nms import single_class_non_max_suppression
from load_model.pytorch_loader import load_pytorch_model, pytorch_inference
# model = load_pytorch_model('models/face_mask_detection.pth');
model = load_pytorch_model('models/model360.pth')
# anchor configuration
#feature_map_sizes = [[33, 33], [17, 17], [9, 9], [5, 5], [3, 3]]
feature_map_sizes = [[45, 45], [23, 23], [12, 12], [6, 6], [4, 4]]
anchor_sizes = [[0.04, 0.056], [0.08, 0.11], [0.16, 0.22], [0.32, 0.45], [0.64, 0.72]]
anchor_ratios = [[1, 0.62, 0.42]] * 5
# generate anchors
anchors = generate_anchors(feature_map_sizes, anchor_sizes, anchor_ratios)
# for inference , the batch size is 1, the model output shape is [1, N, 4],
# so we expand dim for anchors to [1, anchor_num, 4]
anchors_exp = np.expand_dims(anchors, axis=0)
id2class = {0: 'Mask', 1: 'NoMask'}
def inference(image,
              conf_thresh=0.5,
              iou_thresh=0.4,
              target_shape=(160, 160),
              draw_result=True,
              show_result=True
              ):
    '''
    Main function of detection inference
    :param image: 3D numpy array of image
    :param conf_thresh: the min threshold of classification probabity.
    :param iou_thresh: the IOU threshold of NMS
    :param target_shape: the model input size.
    :param draw_result: whether to daw bounding box to the image.
    :param show_result: whether to display the image.
    :return:
    '''
    # image = np.copy(image)
    output_info = []
    height, width, _ = image.shape
    image_resized = cv2.resize(image, target_shape)
    image_np = image_resized / 255.0  # 归一化到0~1
    image_exp = np.expand_dims(image_np, axis=0)
    image_transposed = image_exp.transpose((0, 3, 1, 2))
    y_bboxes_output, y_cls_output = pytorch_inference(model, image_transposed)
    # remove the batch dimension, for batch is always 1 for inference.
    y_bboxes = decode_bbox(anchors_exp, y_bboxes_output)[0]
    y_cls = y_cls_output[0]
    # To speed up, do single class NMS, not multiple classes NMS.
    bbox_max_scores = np.max(y_cls, axis=1)
    bbox_max_score_classes = np.argmax(y_cls, axis=1)
    # keep_idx is the alive bounding box after nms.
    keep_idxs = single_class_non_max_suppression(y_bboxes,
                                                 bbox_max_scores,
                                                 conf_thresh=conf_thresh,
                                                 iou_thresh=iou_thresh,
                                                 )
    for idx in keep_idxs:
        conf = float(bbox_max_scores[idx])
        class_id = bbox_max_score_classes[idx]
        bbox = y_bboxes[idx]
        # clip the coordinate, avoid the value exceed the image boundary.
        xmin = max(0, int(bbox[0] * width))
        ymin = max(0, int(bbox[1] * height))
        xmax = min(int(bbox[2] * width), width)
        ymax = min(int(bbox[3] * height), height)
        if draw_result:
            if class_id == 0:
                color = (0, 255, 0)
            else:
                color = (255, 0, 0)
            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2)
            cv2.putText(image, "%s: %.2f" % (id2class[class_id], conf), (xmin + 2, ymin - 2),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, color)
        output_info.append([class_id, conf, xmin, ymin, xmax, ymax])
    if show_result:
        Image.fromarray(image).show()
    return output_info
def run_on_video(video_path, output_video_name, conf_thresh):
    cap = cv2.VideoCapture(video_path)
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    # writer = cv2.VideoWriter(output_video_name, fourcc, int(fps), (int(width), int(height)))
    total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    if not cap.isOpened():
        raise ValueError("Video open failed.")
        return
    status = True
    idx = 0
    while status:
        start_stamp = time.time()
        status, img_raw = cap.read()
        img_raw = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB)
        read_frame_stamp = time.time()
        if (status):
            inference(img_raw,
                      conf_thresh,
                      iou_thresh=0.5,
                      target_shape=(360, 360),
                      draw_result=True,
                      show_result=False)
            cv2.imshow('image', img_raw[:, :, ::-1])
            cv2.waitKey(1)
            inference_stamp = time.time()
            # writer.write(img_raw)
            write_frame_stamp = time.time()
            idx += 1
            print("%d of %d" % (idx, total_frames))
            print("read_frame:%f, infer time:%f, write time:%f" % (read_frame_stamp - start_stamp,
                                                                   inference_stamp - read_frame_stamp,
                                                                   write_frame_stamp - inference_stamp))
    # writer.release()
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Face Mask Detection")
    parser.add_argument('--img-mode', type=int, default=1, help='set 1 to run on image, 0 to run on video.')
    parser.add_argument('--img-path', type=str, default='img_test/test5.jpg', help='path to your image.')
    parser.add_argument('--video-path', type=str, default='0', help='path to your video, `0` means to use camera.')
    # parser.add_argument('--hdf5', type=str, help='keras hdf5 file')
    args = parser.parse_args()
    if args.img_mode:
        imgPath = args.img_path
        img = cv2.imread(imgPath)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        inference(img, show_result=True, target_shape=(360, 360))
    else:
        video_path = args.video_path
        if args.video_path == '0':
            video_path = 0
        run_on_video(video_path, '', conf_thresh=0.5)

网络异常，图片无法展示

图片检测结果：

python pytorch_infer.py  --img-path /path/to/your/img

前：

网络异常，图片无法展示

视频流检测

python pytorch_infer.py --img-mode 0 --video-path /path/to/video  
# 如果要打开本地摄像头, video_path填写0就可以了，如下
python pytorch_infer.py --img-mode 0 --video-path 0

网络异常，图片无法展示

–end–

深度学习之目标检测--Pytorch实战 2

四、神经网络的搭建

二维卷积层Convolution Layers

池化层 Pooling layers

非线性激活Non-linear Activations (weighted sum, nonlinearity)

nn. ReLU

nn.Sigmoid

正则化Normalization Layers

nn.BatchNorm2d

Recurrent Layers

nn.RNN

nn.LSTM

Sparse Layers

nn.Embedding

Linear Layers

SEQUENTIAL

损失函数Loss Functions

nn.L1Loss

nn.MSELoss

nn.CrossEntropyLoss

优化器TORCH.OPTIM

网络模型的增删改

模型的保存与加载

模型训练套路

GPU训练模型

用任意的模型测试神经网络能否预测成功

五、项目实战

YOLO v5训练

视频检测

训练coco128数据集

设置参数

VOC2007(Visual Object Classes)数据集

coco数据集(Common Objects in Context)

自制数据集，训练

FaceMaskDetection 人脸面罩检测

热门文章

最新文章

相关课程

相关电子书

相关实验场景

推荐镜像

热门

活动广场

任务中心

开发者评测

高校计划

乘风者计划

训练营

阿里云MVP

话题

直播

下载

镜像站

技术资料

插件

深度学习之目标检测--Pytorch实战 2

四、神经网络的搭建

二维卷积层Convolution Layers

池化层 Pooling layers

非线性激活Non-linear Activations (weighted sum, nonlinearity)

nn. ReLU

nn.Sigmoid

正则化Normalization Layers

nn.BatchNorm2d

Recurrent Layers

nn.RNN

nn.LSTM

Sparse Layers

nn.Embedding

Linear Layers

SEQUENTIAL

损失函数Loss Functions

nn.L1Loss nn.MSELoss

nn.CrossEntropyLoss

优化器TORCH.OPTIM

网络模型的增删改

模型的保存与加载

模型训练套路

GPU训练模型

用任意的模型测试神经网络能否预测成功

五、项目实战

YOLO v5训练

视频检测

训练coco128数据集

设置参数

VOC2007(Visual Object Classes)数据集

coco数据集(Common Objects in Context)

自制数据集，训练

FaceMaskDetection 人脸面罩检测

热门文章

最新文章

相关课程

相关电子书

相关实验场景

推荐镜像

nn.L1Loss

nn.MSELoss