深度学习之目标检测--Pytorch实战 2

简介: 深度学习之目标检测--Pytorch实战

四、神经网络的搭建

# nn_module.py
# @Time: 2022/1/13 21:42
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
from torch import nn
class NeuralNetwork(nn.Module):
    def __init__(self) -> None:
        super().__init__()
    def forward(self, input):
        output = input + 1
        return output
neuralnetwork = NeuralNetwork()
x = torch.tensor(1.0)
output = neuralnetwork(x)
print(output)

fc2efbae29d24acf8c5e0cd5c6e14b71.png

二维卷积层Convolution Layers

1b061bcb765f42dfac128920b1398bde.png

# nn_conv.py
# @Time: 2022/1/13 22:08
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torch.nn.functional as F
input = torch.tensor([[1, 2, 0, 3, 1],
                      [0, 1, 2, 3, 1],
                      [1, 2, 1, 0, 0],
                      [5, 2, 3, 1, 1],
                      [2, 1, 0, 1, 1]])
kernel = torch.tensor([[1, 2, 1],
                       [0, 1, 0],
                       [2, 1, 0]])
input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))
print(input.shape)
print(kernel.shape)
output = F.conv2d(input, kernel, stride=1)
print(output)
output2 = F.conv2d(input, kernel, stride=2)
print(output2)
output3 = F.conv2d(input, kernel, stride=1, padding=1)
print(output3)

62d89c872cf448fc9b6a570948fbc113.png

# src/nn_conv2d.py
# @Time: 2022/1/13 22:37
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=64)
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
    def forward(self,x):
        x = self.conv1(x)
        return x
neuralnetwork = NeuralNetwork()
# print(neuralnetwork)
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
    imgs, targets = data
    output = neuralnetwork(imgs)
    print(imgs.shape)
    print(output.shape)
    # torch.Size([64, 3, 32, 32])
    writer.add_images("input", imgs, step)
    # torch.Size([64, 6, 30, 30])
    output = torch.reshape(output, (-1, 3, 30, 30))
    writer.add_images("output", output, step)
    step += 1
writer.close()

95cb5dabe74443749f7f83f34f77c1f4.png7e2181ca4d564f6ab2f2980c0938e1f6.png

池化层 Pooling layers

463f337e141b416fb752d8cef779f8d5.png


226385d9e099402bae530c47bb0843e0.png


# nn_maxpool.py
# @Time: 2022/1/14 9:53
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
from torch import nn
from torch.nn import MaxPool2d
input = torch.tensor([[1, 2, 0, 3, 1],
                      [0, 1, 2, 3, 1],
                      [1, 2, 1, 0, 0],
                      [5, 2, 3, 1, 1],
                      [2, 1, 0, 1, 1]], dtype=torch.float32)
input = torch.reshape(input, (-1, 1, 5, 5))
print(input.shape)
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)
    def forward(self, input):
        output = self.maxpool1(input)
        return output
nueralnetwork = NueralNetwork()
output = nueralnetwork(input)
print(output)
tensor([[[[2., 3.],
          [5., 1.]]]])


e2fd77ebceec4bb89d0f325816ff9f63.png

ceil_mode=False
tensor([[[[2.]]]])

5cc0783aa3a642d8a790e11819d1d7d6.png

# @Time: 2022/1/14 9:53
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import MaxPool2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=64)
# input = torch.tensor([[1, 2, 0, 3, 1],
#                       [0, 1, 2, 3, 1],
#                       [1, 2, 1, 0, 0],
#                       [5, 2, 3, 1, 1],
#                       [2, 1, 0, 1, 1]], dtype=torch.float32)
#
# input = torch.reshape(input, (-1, 1, 5, 5))
# print(input.shape)
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)
    def forward(self, input):
        output = self.maxpool1(input)
        return output
nueralnetwork = NueralNetwork()
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
    imgs, targets = data
    writer.add_images("input", imgs, step)
    output = nueralnetwork(imgs)
    writer.add_images("output", output, step)
    step += 1
writer.close()
# output = nueralnetwork(input)
#
# print(output)

eee40e838e3e467ba67b932c3d9a4a47.png


1bd84e5c7b1e4af199aeda3e90a82df3.png

非线性激活Non-linear Activations (weighted sum, nonlinearity)

nn. ReLU

493e82a4bf85493b81ae84c54ff9e03c.png

import torch
import torchvision
from torch import nn
from torch.nn import ReLU
from torch.utils.data import DataLoader
input = torch.tensor([[1, -0.5],
                      [-1, 3]])
input = torch.reshape(input, (-1, 1, 2, 2))
print(input.shape)
print(input)
# tensor([[[[ 1.0000, -0.5000],
          [-1.0000,  3.0000]]]])
class NerualNetwork(nn.Module):
    def __init__(self):
        super(NerualNetwork, self).__init__()
        self.relu1 = ReLU()
    def forward(self, input):
        output = self.relu1(input)
        return output
nerualnetwork = NerualNetwork()
output = nerualnetwork(input)
print(output)
tensor([[[[1., 0.],
          [0., 3.]]]])

05896672021948e0b6edee74155a7607.png

nn.Sigmoid

24f242a23148415485065f308fc238f7.png

tensorboard里面 step 不从0 开始,是图片显示的问题, 用命令:

tensorboard --logdir=logs --samples_per_plugin images=1000
# @Time: 2022/1/14 10:34
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import ReLU, Sigmoid
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
input = torch.tensor([[1, -0.5],
                      [-1, 3]])
input = torch.reshape(input, (-1, 1, 2, 2))
print(input.shape)
print(input)
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=64, )
class NerualNetwork(nn.Module):
    def __init__(self):
        super(NerualNetwork, self).__init__()
        self.relu1 = ReLU()
        self.sigmod1 = Sigmoid()
    def forward(self, input):
        output = self.sigmod1(input)
        return output
nerualnetwork = NerualNetwork()
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
    imgs, target = data
    writer.add_images("input", imgs, global_step=step)
    output = nerualnetwork(imgs)
    writer.add_images("output", output, global_step=step)
    step += 1
writer.close()

e1a825ad930848cf85a9047c79d04737.png


994c2e2c69154747989639525aac84e0.png

正则化Normalization Layers

nn.BatchNorm2d

Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift .

d9ea1fccfcd440e49747a707abc6365d.png

Recurrent Layers

nn.RNN

nn.LSTM

Sparse Layers

nn.Embedding

A simple lookup table that stores embeddings of a fixed dictionary and size.

Linear Layers

# nn_linear.py
# @Time: 2022/1/14 11:48
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import Linear
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=64)
class NerualNetwork(nn.Module):
    def __init__(self):
        super(NerualNetwork, self).__init__()
        self.linear1 = Linear(196608, 10)
    def forward(self, input):
        output = self.linear1(input)
        return output
nerualnetwork = NerualNetwork()
for data in dataloader:
    imgs, targets =data
    print(imgs.shape)
    # torch.Size([64, 3, 32, 32])
    # output = torch.reshape(imgs, (1, 1, 1, -1))
    output = torch.flatten(imgs)
    print(output.shape)
    # torch.Size([196608])
    output = nerualnetwork(output)
    print(output.shape)
    # torch.Size([10])

85a860d57a204242ad75530555e1d1a6.png

SEQUENTIAL

# nn_seq.py
# @Time: 2022/1/14 12:57
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.tensorboard import SummaryWriter
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        # self.conv1 = Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2)
        # self.maxpool1 = MaxPool2d(kernel_size=2)
        # self.conv2 = Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2)
        # self.maxpool2 = MaxPool2d(kernel_size=2)
        # self.conv3 = Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
        # self.maxpool3 = MaxPool2d(kernel_size=2)
        # self.flatten = Flatten()
        # self.linear1 = Linear(in_features=1024, out_features=64)
        # self.linear2 = Linear(in_features=64, out_features=10)
        # 简单的写法
        self.model1 = Sequential(
            Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Flatten(),
            Linear(in_features=1024, out_features=64),
            Linear(in_features=64, out_features=10)
        )
    def forward(self, x):
        # x = self.conv1(x)
        # x = self.maxpool1(x)
        # x = self.conv2(x)
        # x = self.maxpool2(x)
        # x = self.conv3(x)
        # x = self.maxpool3(x)
        # x = self.flatten(x)
        # x = self.linear1(x)
        # x = self.linear2(x)
        x = self.model1(x)
        return x
nueralnetwork = NueralNetwork()
print(nueralnetwork)
input = torch.ones((64, 3, 32, 32))
output = nueralnetwork(input)
print(output.shape)
writer = SummaryWriter("../logs")
writer.add_graph(nueralnetwork, input)
writer.close()

输出结果是

NueralNetwork(
  (model1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=1024, out_features=64, bias=True)
    (8): Linear(in_features=64, out_features=10, bias=True)
  )
)
torch.Size([64, 10])

3d62fa7cd9054e58966110e2f6740eb3.png793b60c6be104169a31ceece93d6def1.pnge223dadc91bf495da959bd88206202ef.png

损失函数Loss Functions

nn.L1Loss

28cb6dd32d5d44a69c8f64d5b9fd87a1.png
nn.MSELoss

# nn_loss_network.py
import torchvision
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, download=True,
                                       transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=1)
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        self.model1 = Sequential(
            Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Flatten(),
            Linear(in_features=1024, out_features=64),
            Linear(in_features=64, out_features=10)
        )
    def forward(self, x):
        x = self.model1(x)
        return x
loss = nn.CrossEntropyLoss()
nueralnetwork = NueralNetwork()
for data in dataloader:
    imgs, targets = data
    outputs = nueralnetwork(imgs)
    result_loss = loss(outputs, targets)
    print(result_loss)
    print(outputs)
    print(targets)

b43d7005acb2438db38d42d89dfce382.png

nn.CrossEntropyLoss

优化器TORCH.OPTIM

# nn_optim.py
# @Time: 2022/1/14 20:21
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, download=True,
                                       transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=1)
class NueralNetwork(nn.Module):
    def __init__(self):
        super(NueralNetwork, self).__init__()
        self.model1 = Sequential(
            Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
            MaxPool2d(kernel_size=2),
            Flatten(),
            Linear(in_features=1024, out_features=64),
            Linear(in_features=64, out_features=10)
        )
    def forward(self, x):
        x = self.model1(x)
        return x
loss = nn.CrossEntropyLoss()
nueralnetwork = NueralNetwork()
# 定义优化器
optim = torch.optim.SGD(nueralnetwork.parameters(), lr=0.01)
for epoch in range(20):
    running_loss = 0.0
    for data in dataloader:
        imgs, targets = data
        outputs = nueralnetwork(imgs)
        result_loss = loss(outputs, targets)
        # 梯度清零
        optim.zero_grad()
        result_loss.backward()
        optim.step()
        running_loss += result_loss
    print(running_loss)

88aa589c9d244d38915c3190a12eaf2f.png

断点调试

0a0521858b55499fb5a4b646e06f4871.png595e0af8bd5040edbee945a2e75018de.png03aeec62f05b4885aef1ae60e06defb7.png

网络模型的增删改

# model_pretrained.py
import torchvision
# train_data = torchvision.datasets.ImageNet("../data_image_net", split='train', download=True,
#                                            transform=torchvision.transforms.ToTensor())
RuntimeError: The dataset is no longer publicly accessible. You need to download the archives externally and place them in the root directory.
from torch import nn
vgg16_false = torchvision.models.vgg16(pretrained=False)
vgg16_true = torchvision.models.vgg16(pretrained=True)
print(vgg16_true)

96c624a099b24ff6adb2d42ec985986e.png

train_data = torchvision.datasets.CIFAR10('../data', train=True, transform=torchvision.transforms.ToTensor(),
                                          download=True)
vgg16_true.classifier.add_module('add_linear', nn.Linear(1000, 10))
print(vgg16_true)
print(vgg16_false) 

6424f732c3174785b43ae65ac588a858.png

vgg16_false.classifier[6] = nn.Linear(4096, 10)
print(vgg16_false)

bd97f494fb30448ea4d2908f6b9c9d1a.png

数据集下载的位置:Downloading:

https://download.pytorch.org/models/vgg16-397923af.pth

to C:\Users\15718/.cache\torch\hub\checkpoints\vgg16-397923af.pth


***如果遇到数据集较大,网络不通畅,建议:复制下载链接,用迅雷下 ***

模型的保存与加载

# model_save.py
import torch
import torchvision
from torch import nn
vgg16 = torchvision.models.vgg16(pretrained=False)
# 保存方式一     模型结构 + 模型参数
torch.save(vgg16, "vgg16_method1.pth")
# 保存方式二     模型参数(官方推荐)
torch.save(vgg16.state_dict(), "vgg16_method2.pth")
# 陷阱
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
    def forward(self,x):
        x = self.conv1(x)
        return x
neuralnetwork = NeuralNetwork()
torch.save(neuralnetwork, "neuralnetwork_method1.pth")
# model_load.py
import torch
from model_save import *
# 方式一  --》 保存方式一,加载模型
model = torch.load("vgg16_method1.pth")
# print(model)

ef52101fc1334db79d305d117d11fffa.png

# 方式二   加载模型,字典格式的
model = torch.load("vgg16_method2.pth")
print(model)

308aa6fbd7c8410aaab154410df7eea4.png

# 方式二   加载模型,VGG格式的
vgg16 = torchvision.models.vgg16(pretrained=False)
vgg16.load_state_dict(torch.load("vgg16_method2.pth"))
print(vgg16)

ddf89b82c1ae41a482d0c3a75eaf70b3.png

# 陷阱
model = torch.load("neuralnetwork_method1.pth")
print(model)


6e42c4d4ae8048db838c0042f64e6d84.png

模型训练套路

# model.py
# @Time: 2022/1/15 9:03
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
from torch import nn
#搭建神经网络
class NurealNetwork(nn.Module):
    def __init__(self):
        super(NurealNetwork, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(),
            nn.Linear(64*4*4, 64),
            nn.Linear(64, 10)
        )
    def forward(self, x):
        x = self.model(x)
        return x
if __name__ == '__main__':
    nurealnetwork = NurealNetwork()
    input = torch.ones((64, 3, 32, 32))
    output = nurealnetwork(input)
    print(output.shape)
# train.py
# @Time: 2022/1/15 8:43
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
# 准备数据集
import torch.optim
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from model import *
train_data = torchvision.datasets.CIFAR10(root="../dataset", train=True,transform=torchvision.transforms.ToTensor(),
                                          download=True)
test_data = torchvision.datasets.CIFAR10(root="../dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                         download=True)
train_data_size = len(train_data)
test_data_size = len(test_data)
# python中格式化字符串的写法,如果train_data_size = 10, 输出-->训练数据集的长度为:10
print("训练数据集的长度为:{}".format(train_data_size))
print("测试数据集的长度为:{}".format(test_data_size))
# 利用DataLoader加载数据集
train_dataloader = DataLoader(train_data,batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
# 创建神经网络
nurealnetwork = NurealNetwork()
# 损失函数
loss_fn = nn.CrossEntropyLoss()
# 优化器
# learning_rate = 0.01
learning_rate = 1e-2
optimizer = torch.optim.SGD(nurealnetwork.parameters(), lr=learning_rate)
# 设置训练网络的一些参数
# 记录训练次数
total_train_step = 0
# 记录测试次数
total_test_step = 0
# 训练的轮数
epoch = 10
# 添加Tensorboard
writer = SummaryWriter("../logs_train")
for i in range(epoch):
    print("------第 {} 轮训练开始------".format(i + 1))
    # 训练开始
    nurealnetwork.train()
    for data in train_dataloader:
        imgs, targets = data
        outputs = nurealnetwork(imgs)
        loss = loss_fn(outputs, targets)
        # 优化器优化模型
        # 梯度清零
        optimizer.zero_grad()
        # 反向传播得出每一个梯度
        loss.backward()
        # 对其中的参数进行优化
        optimizer.step()
        # 训练结束,total_train_step加一
        total_train_step += 1
        # 减少打印的量
        if total_train_step % 100 == 0:
            print("训练次数:{} , Loss: {}".format(total_train_step, loss.item()))
            writer.add_scalar("train_loss", loss.item(), total_train_step)
    #验证集
    nurealnetwork.eval()
    total_test_loss = 0
    total_accuracy = 0
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            outputs = nurealnetwork(imgs)
            # 比较输出与目标之间的差距
            loss = loss_fn(outputs, targets)
            total_test_loss += loss.item()
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy += accuracy
    print("整体测试集上的Loss:{}".format(total_test_loss))
    print("整体测试集上的正确率:{}".format(total_accuracy / test_data_size))
    writer.add_scalar("test_loss", total_test_loss, total_test_step)
    writer.add_scalar("test_accracy", total_accuracy / test_data_size, total_test_step)
    total_test_step += 1
    # 保存模型
    torch.save(nurealnetwork, "nurealnetwork_{}.pth".format(i))
    #官方推荐保存方式
    # torch.save(nurealnetwork.state_dict(), "nurealnetwork_{}.pth".format(i))
    print("模型已保存")
writer.close()

96fc9a5a34a14c03aeb32aadf3d69a77.png


43239704ec424d93b67896e4a0714e7f.png


eeb153f9f03944b2899b419d502d2918.png

GPU训练模型

CPU训练 vs GPU训练

CPU 用时

77d016ebbb5b4b9887c86d203ed884af.png

GPU 用时

51cd43269bd845efbdfe018ac850ead6.png

用任意的模型测试神经网络能否预测成功

# test.py
# @Time: 2022/1/15 15:40
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from PIL import Image
from torch import nn
image_path = "../images/dog.png"
image = Image.open(image_path)
print(image)
image = image.convert("RGB")
transform =torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
                                           torchvision.transforms.ToTensor()])
image =transform(image)
print(image.shape)
class Tudui(nn.Module):
    def __init__(self):
        super(Tudui, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(),
            nn.Linear(64*4*4, 64),
            nn.Linear(64, 10)
        )
    def forward(self, x):
        x = self.model(x)
        return x
model = torch.load("tudui_29_gpu.pth")
print(model)
image = torch.reshape(image, (1, 3, 32, 32))
model.eval()
with torch.no_grad():
    image = image.cuda()
    output = model(image)
print(output)
print(output.argmax(1))

364c6467ef664d96b33f32ae1a7807a6.png

五、项目实战

YOLO v5训练

下载代码:https://github.com/ultralytics/yolov5/tree/v6.0

安装,配置环境

pip install -r requirements.txt

如果安装依赖库太慢了或者失败了,建议在Anaconda命令行里,进入项目的对应仓库里,安装依赖库,命令如下

pip install 依赖库 -i https://pypi.tuna.tsinghua.edu.cn/simple


d418b89817064b148dd93922cf334d8a.png

def parse_opt():
    parser = argparse.ArgumentParser()
    # 设置权重,可选预训练数据集模型有yolov5s.pt yolov5m.pt yolov5l.pt yolov5x.pt 依次增大,训练所需时间也相应更久,效果更好
    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
    # 设置训练的径在哪里
    parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
    # 默认指定了图片尺寸640px,
    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
    parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
    #指定是用CPU训练还是GPU训练,如果有多个GPU,可以同时开启
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    # 训练过程中可以预览图片,如果是训练视频则可以预览视频
    parser.add_argument('--view-img', action='store_true', help='show results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--visualize', action='store_true', help='visualize features')
    parser.add_argument('--update', action='store_true', help='update all models')
    parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
    parser.add_argument('--name', default='exp', help='save results to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
    parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
    parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
    opt = parser.parse_args()
    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
    print_args(FILE.stem, opt)
    return opt

b5fc6247e5d848ad99bc203ff90d0762.png

5fb55308ae9847b3a353d36163d94818.png

效果如图:

image.jpeg



网络异常,图片无法展示
|


image.jpeg



image.jpeg


image.jpeg

视频检测


image.png

将batch_size = 32 调为4,不然显卡会带不动

测试集跑完整个视频,大概花了半个小时

本地跑不动的可以用Google colab 云端服务器跑

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
    # 指定视频的路径
    parser.add_argument('--source', type=str, default='data/video/movie.mp4', help='source')  # file/folder, 0 for webcam
    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--view-img', action='store_true', help='display results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--update', action='store_true', help='update all models')
    parser.add_argument('--project', default='runs/detect', help='save results to project/name')
    parser.add_argument('--name', default='exp', help='save results to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    opt = parser.parse_args()
    print(opt)
    check_requirements(exclude=('pycocotools', 'thop'))
    with torch.no_grad():
        if opt.update:  # update all models (to fix SourceChangeWarning)
            for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
                detect()
                strip_optimizer(opt.weights)
        else:
            detect()

视频截图:

image.png

训练coco128数据集

设置参数

40937eb6df4c4b569335931f518583ce.png

4a17415166164eeb968675c6963eaa4d.png73e4ed159fcd4ac3aabc9e0477607813.png


f695b1c03b9a473ebaa24e2c6405b8ea.png

e5abae92b92e4b4bb4ec4979a5dcf5ed.png

image.png

image.png


image.png

训练结果:



image.png


image.png



image.png


image.png

VOC2007(Visual Object Classes)数据集


image.png

Annotations:包含了xml文件,描述了图片的各种信息,特别是标注出了目标的位置坐标

ImageSets:主要关注下main文件夹的内容,里面的文件包含了不同类别的训练/验证数据集图片名称

JPEGImages:原图片

SegmentatioClass、SegmenObject:语义分割

coco数据集(Common Objects in Context)

待更新

自制数据集,训练

待更新

FaceMaskDetection 人脸面罩检测

源码https://github.com/AIZOOTech/FaceMaskDetection

# -*- coding:utf-8 -*-
import cv2
import time
import argparse
import numpy as np
from PIL import Image
from utils.anchor_generator import generate_anchors
from utils.anchor_decode import decode_bbox
from utils.nms import single_class_non_max_suppression
from load_model.pytorch_loader import load_pytorch_model, pytorch_inference
# model = load_pytorch_model('models/face_mask_detection.pth');
model = load_pytorch_model('models/model360.pth')
# anchor configuration
#feature_map_sizes = [[33, 33], [17, 17], [9, 9], [5, 5], [3, 3]]
feature_map_sizes = [[45, 45], [23, 23], [12, 12], [6, 6], [4, 4]]
anchor_sizes = [[0.04, 0.056], [0.08, 0.11], [0.16, 0.22], [0.32, 0.45], [0.64, 0.72]]
anchor_ratios = [[1, 0.62, 0.42]] * 5
# generate anchors
anchors = generate_anchors(feature_map_sizes, anchor_sizes, anchor_ratios)
# for inference , the batch size is 1, the model output shape is [1, N, 4],
# so we expand dim for anchors to [1, anchor_num, 4]
anchors_exp = np.expand_dims(anchors, axis=0)
id2class = {0: 'Mask', 1: 'NoMask'}
def inference(image,
              conf_thresh=0.5,
              iou_thresh=0.4,
              target_shape=(160, 160),
              draw_result=True,
              show_result=True
              ):
    '''
    Main function of detection inference
    :param image: 3D numpy array of image
    :param conf_thresh: the min threshold of classification probabity.
    :param iou_thresh: the IOU threshold of NMS
    :param target_shape: the model input size.
    :param draw_result: whether to daw bounding box to the image.
    :param show_result: whether to display the image.
    :return:
    '''
    # image = np.copy(image)
    output_info = []
    height, width, _ = image.shape
    image_resized = cv2.resize(image, target_shape)
    image_np = image_resized / 255.0  # 归一化到0~1
    image_exp = np.expand_dims(image_np, axis=0)
    image_transposed = image_exp.transpose((0, 3, 1, 2))
    y_bboxes_output, y_cls_output = pytorch_inference(model, image_transposed)
    # remove the batch dimension, for batch is always 1 for inference.
    y_bboxes = decode_bbox(anchors_exp, y_bboxes_output)[0]
    y_cls = y_cls_output[0]
    # To speed up, do single class NMS, not multiple classes NMS.
    bbox_max_scores = np.max(y_cls, axis=1)
    bbox_max_score_classes = np.argmax(y_cls, axis=1)
    # keep_idx is the alive bounding box after nms.
    keep_idxs = single_class_non_max_suppression(y_bboxes,
                                                 bbox_max_scores,
                                                 conf_thresh=conf_thresh,
                                                 iou_thresh=iou_thresh,
                                                 )
    for idx in keep_idxs:
        conf = float(bbox_max_scores[idx])
        class_id = bbox_max_score_classes[idx]
        bbox = y_bboxes[idx]
        # clip the coordinate, avoid the value exceed the image boundary.
        xmin = max(0, int(bbox[0] * width))
        ymin = max(0, int(bbox[1] * height))
        xmax = min(int(bbox[2] * width), width)
        ymax = min(int(bbox[3] * height), height)
        if draw_result:
            if class_id == 0:
                color = (0, 255, 0)
            else:
                color = (255, 0, 0)
            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2)
            cv2.putText(image, "%s: %.2f" % (id2class[class_id], conf), (xmin + 2, ymin - 2),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, color)
        output_info.append([class_id, conf, xmin, ymin, xmax, ymax])
    if show_result:
        Image.fromarray(image).show()
    return output_info
def run_on_video(video_path, output_video_name, conf_thresh):
    cap = cv2.VideoCapture(video_path)
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    # writer = cv2.VideoWriter(output_video_name, fourcc, int(fps), (int(width), int(height)))
    total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    if not cap.isOpened():
        raise ValueError("Video open failed.")
        return
    status = True
    idx = 0
    while status:
        start_stamp = time.time()
        status, img_raw = cap.read()
        img_raw = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB)
        read_frame_stamp = time.time()
        if (status):
            inference(img_raw,
                      conf_thresh,
                      iou_thresh=0.5,
                      target_shape=(360, 360),
                      draw_result=True,
                      show_result=False)
            cv2.imshow('image', img_raw[:, :, ::-1])
            cv2.waitKey(1)
            inference_stamp = time.time()
            # writer.write(img_raw)
            write_frame_stamp = time.time()
            idx += 1
            print("%d of %d" % (idx, total_frames))
            print("read_frame:%f, infer time:%f, write time:%f" % (read_frame_stamp - start_stamp,
                                                                   inference_stamp - read_frame_stamp,
                                                                   write_frame_stamp - inference_stamp))
    # writer.release()
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Face Mask Detection")
    parser.add_argument('--img-mode', type=int, default=1, help='set 1 to run on image, 0 to run on video.')
    parser.add_argument('--img-path', type=str, default='img_test/test5.jpg', help='path to your image.')
    parser.add_argument('--video-path', type=str, default='0', help='path to your video, `0` means to use camera.')
    # parser.add_argument('--hdf5', type=str, help='keras hdf5 file')
    args = parser.parse_args()
    if args.img_mode:
        imgPath = args.img_path
        img = cv2.imread(imgPath)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        inference(img, show_result=True, target_shape=(360, 360))
    else:
        video_path = args.video_path
        if args.video_path == '0':
            video_path = 0
        run_on_video(video_path, '', conf_thresh=0.5)

网络异常,图片无法展示
|

图片检测结果:

python pytorch_infer.py  --img-path /path/to/your/img

前:

网络异常,图片无法展示
|


网络异常,图片无法展示
|

视频流检测

python pytorch_infer.py --img-mode 0 --video-path /path/to/video  
# 如果要打开本地摄像头, video_path填写0就可以了,如下
python pytorch_infer.py --img-mode 0 --video-path 0


网络异常,图片无法展示
|

–end–

相关实践学习
部署Stable Diffusion玩转AI绘画(GPU云服务器)
本实验通过在ECS上从零开始部署Stable Diffusion来进行AI绘画创作,开启AIGC盲盒。
相关文章
|
1月前
|
机器学习/深度学习 数据可视化 计算机视觉
目标检测笔记(五):详细介绍并实现可视化深度学习中每层特征层的网络训练情况
这篇文章详细介绍了如何通过可视化深度学习中每层特征层来理解网络的内部运作,并使用ResNet系列网络作为例子,展示了如何在训练过程中加入代码来绘制和保存特征图。
54 1
目标检测笔记(五):详细介绍并实现可视化深度学习中每层特征层的网络训练情况
|
10天前
|
机器学习/深度学习 监控 PyTorch
深度学习工程实践:PyTorch Lightning与Ignite框架的技术特性对比分析
在深度学习框架的选择上,PyTorch Lightning和Ignite代表了两种不同的技术路线。本文将从技术实现的角度,深入分析这两个框架在实际应用中的差异,为开发者提供客观的技术参考。
29 7
|
1月前
|
机器学习/深度学习 算法 PyTorch
深度学习笔记(十三):IOU、GIOU、DIOU、CIOU、EIOU、Focal EIOU、alpha IOU、SIOU、WIOU损失函数分析及Pytorch实现
这篇文章详细介绍了多种用于目标检测任务中的边界框回归损失函数,包括IOU、GIOU、DIOU、CIOU、EIOU、Focal EIOU、alpha IOU、SIOU和WIOU,并提供了它们的Pytorch实现代码。
149 1
深度学习笔记(十三):IOU、GIOU、DIOU、CIOU、EIOU、Focal EIOU、alpha IOU、SIOU、WIOU损失函数分析及Pytorch实现
|
1月前
|
机器学习/深度学习 PyTorch 算法框架/工具
揭秘深度学习中的微调难题:如何运用弹性权重巩固(EWC)策略巧妙应对灾难性遗忘,附带实战代码详解助你轻松掌握技巧
【10月更文挑战第1天】深度学习中,模型微调虽能提升性能,但常导致“灾难性遗忘”,即模型在新任务上训练后遗忘旧知识。本文介绍弹性权重巩固(EWC)方法,通过在损失函数中加入正则项来惩罚对重要参数的更改,从而缓解此问题。提供了一个基于PyTorch的实现示例,展示如何在训练过程中引入EWC损失,适用于终身学习和在线学习等场景。
56 4
揭秘深度学习中的微调难题:如何运用弹性权重巩固(EWC)策略巧妙应对灾难性遗忘,附带实战代码详解助你轻松掌握技巧
|
2月前
|
机器学习/深度学习 PyTorch 调度
在Pytorch中为不同层设置不同学习率来提升性能,优化深度学习模型
在深度学习中,学习率作为关键超参数对模型收敛速度和性能至关重要。传统方法采用统一学习率,但研究表明为不同层设置差异化学习率能显著提升性能。本文探讨了这一策略的理论基础及PyTorch实现方法,包括模型定义、参数分组、优化器配置及训练流程。通过示例展示了如何为ResNet18设置不同层的学习率,并介绍了渐进式解冻和层适应学习率等高级技巧,帮助研究者更好地优化模型训练。
132 4
在Pytorch中为不同层设置不同学习率来提升性能,优化深度学习模型
|
30天前
|
机器学习/深度学习 人工智能 算法
揭开深度学习与传统机器学习的神秘面纱:从理论差异到实战代码详解两者间的选择与应用策略全面解析
【10月更文挑战第10天】本文探讨了深度学习与传统机器学习的区别,通过图像识别和语音处理等领域的应用案例,展示了深度学习在自动特征学习和处理大规模数据方面的优势。文中还提供了一个Python代码示例,使用TensorFlow构建多层感知器(MLP)并与Scikit-learn中的逻辑回归模型进行对比,进一步说明了两者的不同特点。
63 2
|
1月前
|
机器学习/深度学习 算法 数据可视化
如果你的PyTorch优化器效果欠佳,试试这4种深度学习中的高级优化技术吧
在深度学习领域,优化器的选择对模型性能至关重要。尽管PyTorch中的标准优化器如SGD、Adam和AdamW被广泛应用,但在某些复杂优化问题中,这些方法未必是最优选择。本文介绍了四种高级优化技术:序列最小二乘规划(SLSQP)、粒子群优化(PSO)、协方差矩阵自适应进化策略(CMA-ES)和模拟退火(SA)。这些方法具备无梯度优化、仅需前向传播及全局优化能力等优点,尤其适合非可微操作和参数数量较少的情况。通过实验对比发现,对于特定问题,非传统优化方法可能比标准梯度下降算法表现更好。文章详细描述了这些优化技术的实现过程及结果分析,并提出了未来的研究方向。
25 1
|
30天前
|
机器学习/深度学习 算法 安全
基于深度学习的目标检测的介绍(Introduction to object detection with deep learning)
基于深度学习的目标检测的介绍(Introduction to object detection with deep learning)
25 0
|
1月前
|
机器学习/深度学习 传感器 编解码
深度学习之地球观测中的目标检测
基于深度学习的地球观测中的目标检测是将深度学习技术应用于遥感数据中以自动识别和定位目标物体的过程。这一技术迅速成为遥感领域的研究热点,主要原因在于地球观测(Earth Observation, EO)平台和遥感技术的进步带来了海量的高分辨率数据,而深度学习技术在目标检测、图像识别等任务上的显著成功为其提供了强有力的支持。
27 0
|
2月前
|
机器学习/深度学习 数据挖掘 PyTorch
🎓PyTorch深度学习入门课:编程小白也能玩转的高级数据分析术
踏入深度学习领域,即使是编程新手也能借助PyTorch这一强大工具,轻松解锁高级数据分析。PyTorch以简洁的API、动态计算图及灵活性著称,成为众多学者与工程师的首选。本文将带你从零开始,通过环境搭建、构建基础神经网络到进阶数据分析应用,逐步掌握PyTorch的核心技能。从安装配置到编写简单张量运算,再到实现神经网络模型,最后应用于图像分类等复杂任务,每个环节都配有示例代码,助你快速上手。实践出真知,不断尝试和调试将使你更深入地理解这些概念,开启深度学习之旅。
35 1

热门文章

最新文章