四、神经网络的搭建
# nn_module.py # @Time: 2022/1/13 21:42 # @Author: 金阳 # @Parameter: # @Version: 1.0.1 import torch from torch import nn class NeuralNetwork(nn.Module): def __init__(self) -> None: super().__init__() def forward(self, input): output = input + 1 return output neuralnetwork = NeuralNetwork() x = torch.tensor(1.0) output = neuralnetwork(x) print(output)
二维卷积层Convolution Layers
# nn_conv.py # @Time: 2022/1/13 22:08 # @Author: 金阳 # @Parameter: # @Version: 1.0.1 import torch import torch.nn.functional as F input = torch.tensor([[1, 2, 0, 3, 1], [0, 1, 2, 3, 1], [1, 2, 1, 0, 0], [5, 2, 3, 1, 1], [2, 1, 0, 1, 1]]) kernel = torch.tensor([[1, 2, 1], [0, 1, 0], [2, 1, 0]]) input = torch.reshape(input, (1, 1, 5, 5)) kernel = torch.reshape(kernel, (1, 1, 3, 3)) print(input.shape) print(kernel.shape) output = F.conv2d(input, kernel, stride=1) print(output) output2 = F.conv2d(input, kernel, stride=2) print(output2) output3 = F.conv2d(input, kernel, stride=1, padding=1) print(output3)
# src/nn_conv2d.py # @Time: 2022/1/13 22:37 # @Author: 金阳 # @Parameter: # @Version: 1.0.1 import torch import torchvision from torch import nn from torch.nn import Conv2d from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True) dataloader = DataLoader(dataset, batch_size=64) class NeuralNetwork(nn.Module): def __init__(self): super(NeuralNetwork, self).__init__() self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0) def forward(self,x): x = self.conv1(x) return x neuralnetwork = NeuralNetwork() # print(neuralnetwork) writer = SummaryWriter("../logs") step = 0 for data in dataloader: imgs, targets = data output = neuralnetwork(imgs) print(imgs.shape) print(output.shape) # torch.Size([64, 3, 32, 32]) writer.add_images("input", imgs, step) # torch.Size([64, 6, 30, 30]) output = torch.reshape(output, (-1, 3, 30, 30)) writer.add_images("output", output, step) step += 1 writer.close()
池化层 Pooling layers
# nn_maxpool.py # @Time: 2022/1/14 9:53 # @Author: 金阳 # @Parameter: # @Version: 1.0.1 import torch from torch import nn from torch.nn import MaxPool2d input = torch.tensor([[1, 2, 0, 3, 1], [0, 1, 2, 3, 1], [1, 2, 1, 0, 0], [5, 2, 3, 1, 1], [2, 1, 0, 1, 1]], dtype=torch.float32) input = torch.reshape(input, (-1, 1, 5, 5)) print(input.shape) class NueralNetwork(nn.Module): def __init__(self): super(NueralNetwork, self).__init__() self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True) def forward(self, input): output = self.maxpool1(input) return output nueralnetwork = NueralNetwork() output = nueralnetwork(input) print(output) tensor([[[[2., 3.], [5., 1.]]]])
ceil_mode=False tensor([[[[2.]]]])
# @Time: 2022/1/14 9:53 # @Author: 金阳 # @Parameter: # @Version: 1.0.1 import torch import torchvision from torch import nn from torch.nn import MaxPool2d from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True) dataloader = DataLoader(dataset, batch_size=64) # input = torch.tensor([[1, 2, 0, 3, 1], # [0, 1, 2, 3, 1], # [1, 2, 1, 0, 0], # [5, 2, 3, 1, 1], # [2, 1, 0, 1, 1]], dtype=torch.float32) # # input = torch.reshape(input, (-1, 1, 5, 5)) # print(input.shape) class NueralNetwork(nn.Module): def __init__(self): super(NueralNetwork, self).__init__() self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True) def forward(self, input): output = self.maxpool1(input) return output nueralnetwork = NueralNetwork() writer = SummaryWriter("../logs") step = 0 for data in dataloader: imgs, targets = data writer.add_images("input", imgs, step) output = nueralnetwork(imgs) writer.add_images("output", output, step) step += 1 writer.close() # output = nueralnetwork(input) # # print(output)
非线性激活Non-linear Activations (weighted sum, nonlinearity)
nn. ReLU
import torch import torchvision from torch import nn from torch.nn import ReLU from torch.utils.data import DataLoader input = torch.tensor([[1, -0.5], [-1, 3]]) input = torch.reshape(input, (-1, 1, 2, 2)) print(input.shape) print(input) # tensor([[[[ 1.0000, -0.5000], [-1.0000, 3.0000]]]]) class NerualNetwork(nn.Module): def __init__(self): super(NerualNetwork, self).__init__() self.relu1 = ReLU() def forward(self, input): output = self.relu1(input) return output nerualnetwork = NerualNetwork() output = nerualnetwork(input) print(output) tensor([[[[1., 0.], [0., 3.]]]])
nn.Sigmoid
tensorboard里面 step 不从0 开始,是图片显示的问题, 用命令:
tensorboard --logdir=logs --samples_per_plugin images=1000
# @Time: 2022/1/14 10:34 # @Author: 金阳 # @Parameter: # @Version: 1.0.1 import torch import torchvision from torch import nn from torch.nn import ReLU, Sigmoid from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter input = torch.tensor([[1, -0.5], [-1, 3]]) input = torch.reshape(input, (-1, 1, 2, 2)) print(input.shape) print(input) dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True) dataloader = DataLoader(dataset, batch_size=64, ) class NerualNetwork(nn.Module): def __init__(self): super(NerualNetwork, self).__init__() self.relu1 = ReLU() self.sigmod1 = Sigmoid() def forward(self, input): output = self.sigmod1(input) return output nerualnetwork = NerualNetwork() writer = SummaryWriter("../logs") step = 0 for data in dataloader: imgs, target = data writer.add_images("input", imgs, global_step=step) output = nerualnetwork(imgs) writer.add_images("output", output, global_step=step) step += 1 writer.close()
正则化Normalization Layers
nn.BatchNorm2d
Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift .
Recurrent Layers
nn.RNN
nn.LSTM
Sparse Layers
nn.Embedding
A simple lookup table that stores embeddings of a fixed dictionary and size.
Linear Layers
# nn_linear.py # @Time: 2022/1/14 11:48 # @Author: 金阳 # @Parameter: # @Version: 1.0.1 import torch import torchvision from torch import nn from torch.nn import Linear from torch.utils.data import DataLoader dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True) dataloader = DataLoader(dataset, batch_size=64) class NerualNetwork(nn.Module): def __init__(self): super(NerualNetwork, self).__init__() self.linear1 = Linear(196608, 10) def forward(self, input): output = self.linear1(input) return output nerualnetwork = NerualNetwork() for data in dataloader: imgs, targets =data print(imgs.shape) # torch.Size([64, 3, 32, 32]) # output = torch.reshape(imgs, (1, 1, 1, -1)) output = torch.flatten(imgs) print(output.shape) # torch.Size([196608]) output = nerualnetwork(output) print(output.shape) # torch.Size([10])
SEQUENTIAL
# nn_seq.py # @Time: 2022/1/14 12:57 # @Author: 金阳 # @Parameter: # @Version: 1.0.1 import torch from torch import nn from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential from torch.utils.tensorboard import SummaryWriter class NueralNetwork(nn.Module): def __init__(self): super(NueralNetwork, self).__init__() # self.conv1 = Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2) # self.maxpool1 = MaxPool2d(kernel_size=2) # self.conv2 = Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2) # self.maxpool2 = MaxPool2d(kernel_size=2) # self.conv3 = Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2) # self.maxpool3 = MaxPool2d(kernel_size=2) # self.flatten = Flatten() # self.linear1 = Linear(in_features=1024, out_features=64) # self.linear2 = Linear(in_features=64, out_features=10) # 简单的写法 self.model1 = Sequential( Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2), MaxPool2d(kernel_size=2), Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2), MaxPool2d(kernel_size=2), Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2), MaxPool2d(kernel_size=2), Flatten(), Linear(in_features=1024, out_features=64), Linear(in_features=64, out_features=10) ) def forward(self, x): # x = self.conv1(x) # x = self.maxpool1(x) # x = self.conv2(x) # x = self.maxpool2(x) # x = self.conv3(x) # x = self.maxpool3(x) # x = self.flatten(x) # x = self.linear1(x) # x = self.linear2(x) x = self.model1(x) return x nueralnetwork = NueralNetwork() print(nueralnetwork) input = torch.ones((64, 3, 32, 32)) output = nueralnetwork(input) print(output.shape) writer = SummaryWriter("../logs") writer.add_graph(nueralnetwork, input) writer.close()
输出结果是
NueralNetwork( (model1): Sequential( (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (6): Flatten(start_dim=1, end_dim=-1) (7): Linear(in_features=1024, out_features=64, bias=True) (8): Linear(in_features=64, out_features=10, bias=True) ) ) torch.Size([64, 10])
损失函数Loss Functions
nn.L1Loss
nn.MSELoss
# nn_loss_network.py import torchvision from torch import nn from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential from torch.utils.data import DataLoader dataset = torchvision.datasets.CIFAR10("../dataset", train=False, download=True, transform=torchvision.transforms.ToTensor()) dataloader = DataLoader(dataset, batch_size=1) class NueralNetwork(nn.Module): def __init__(self): super(NueralNetwork, self).__init__() self.model1 = Sequential( Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2), MaxPool2d(kernel_size=2), Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2), MaxPool2d(kernel_size=2), Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2), MaxPool2d(kernel_size=2), Flatten(), Linear(in_features=1024, out_features=64), Linear(in_features=64, out_features=10) ) def forward(self, x): x = self.model1(x) return x loss = nn.CrossEntropyLoss() nueralnetwork = NueralNetwork() for data in dataloader: imgs, targets = data outputs = nueralnetwork(imgs) result_loss = loss(outputs, targets) print(result_loss) print(outputs) print(targets)
nn.CrossEntropyLoss
优化器TORCH.OPTIM
# nn_optim.py # @Time: 2022/1/14 20:21 # @Author: 金阳 # @Parameter: # @Version: 1.0.1 import torch import torchvision from torch import nn from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential from torch.utils.data import DataLoader dataset = torchvision.datasets.CIFAR10("../dataset", train=False, download=True, transform=torchvision.transforms.ToTensor()) dataloader = DataLoader(dataset, batch_size=1) class NueralNetwork(nn.Module): def __init__(self): super(NueralNetwork, self).__init__() self.model1 = Sequential( Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2), MaxPool2d(kernel_size=2), Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2), MaxPool2d(kernel_size=2), Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2), MaxPool2d(kernel_size=2), Flatten(), Linear(in_features=1024, out_features=64), Linear(in_features=64, out_features=10) ) def forward(self, x): x = self.model1(x) return x loss = nn.CrossEntropyLoss() nueralnetwork = NueralNetwork() # 定义优化器 optim = torch.optim.SGD(nueralnetwork.parameters(), lr=0.01) for epoch in range(20): running_loss = 0.0 for data in dataloader: imgs, targets = data outputs = nueralnetwork(imgs) result_loss = loss(outputs, targets) # 梯度清零 optim.zero_grad() result_loss.backward() optim.step() running_loss += result_loss print(running_loss)
断点调试
网络模型的增删改
# model_pretrained.py import torchvision # train_data = torchvision.datasets.ImageNet("../data_image_net", split='train', download=True, # transform=torchvision.transforms.ToTensor()) RuntimeError: The dataset is no longer publicly accessible. You need to download the archives externally and place them in the root directory. from torch import nn vgg16_false = torchvision.models.vgg16(pretrained=False) vgg16_true = torchvision.models.vgg16(pretrained=True) print(vgg16_true)
train_data = torchvision.datasets.CIFAR10('../data', train=True, transform=torchvision.transforms.ToTensor(), download=True) vgg16_true.classifier.add_module('add_linear', nn.Linear(1000, 10)) print(vgg16_true) print(vgg16_false)
vgg16_false.classifier[6] = nn.Linear(4096, 10) print(vgg16_false)
数据集下载的位置:Downloading:
https://download.pytorch.org/models/vgg16-397923af.pth
to C:\Users\15718/.cache\torch\hub\checkpoints\vgg16-397923af.pth
***如果遇到数据集较大,网络不通畅,建议:复制下载链接,用迅雷下 ***
模型的保存与加载
# model_save.py import torch import torchvision from torch import nn vgg16 = torchvision.models.vgg16(pretrained=False) # 保存方式一 模型结构 + 模型参数 torch.save(vgg16, "vgg16_method1.pth") # 保存方式二 模型参数(官方推荐) torch.save(vgg16.state_dict(), "vgg16_method2.pth") # 陷阱 class NeuralNetwork(nn.Module): def __init__(self): super(NeuralNetwork, self).__init__() self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0) def forward(self,x): x = self.conv1(x) return x neuralnetwork = NeuralNetwork() torch.save(neuralnetwork, "neuralnetwork_method1.pth")
# model_load.py import torch from model_save import * # 方式一 --》 保存方式一,加载模型 model = torch.load("vgg16_method1.pth") # print(model)
# 方式二 加载模型,字典格式的 model = torch.load("vgg16_method2.pth") print(model)
# 方式二 加载模型,VGG格式的 vgg16 = torchvision.models.vgg16(pretrained=False) vgg16.load_state_dict(torch.load("vgg16_method2.pth")) print(vgg16)
# 陷阱 model = torch.load("neuralnetwork_method1.pth") print(model)
模型训练套路
# model.py # @Time: 2022/1/15 9:03 # @Author: 金阳 # @Parameter: # @Version: 1.0.1 import torch from torch import nn #搭建神经网络 class NurealNetwork(nn.Module): def __init__(self): super(NurealNetwork, self).__init__() self.model = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=2), nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=2), nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=2), nn.Flatten(), nn.Linear(64*4*4, 64), nn.Linear(64, 10) ) def forward(self, x): x = self.model(x) return x if __name__ == '__main__': nurealnetwork = NurealNetwork() input = torch.ones((64, 3, 32, 32)) output = nurealnetwork(input) print(output.shape)
# train.py # @Time: 2022/1/15 8:43 # @Author: 金阳 # @Parameter: # @Version: 1.0.1 # 准备数据集 import torch.optim import torchvision from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter from model import * train_data = torchvision.datasets.CIFAR10(root="../dataset", train=True,transform=torchvision.transforms.ToTensor(), download=True) test_data = torchvision.datasets.CIFAR10(root="../dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True) train_data_size = len(train_data) test_data_size = len(test_data) # python中格式化字符串的写法,如果train_data_size = 10, 输出-->训练数据集的长度为:10 print("训练数据集的长度为:{}".format(train_data_size)) print("测试数据集的长度为:{}".format(test_data_size)) # 利用DataLoader加载数据集 train_dataloader = DataLoader(train_data,batch_size=64) test_dataloader = DataLoader(test_data, batch_size=64) # 创建神经网络 nurealnetwork = NurealNetwork() # 损失函数 loss_fn = nn.CrossEntropyLoss() # 优化器 # learning_rate = 0.01 learning_rate = 1e-2 optimizer = torch.optim.SGD(nurealnetwork.parameters(), lr=learning_rate) # 设置训练网络的一些参数 # 记录训练次数 total_train_step = 0 # 记录测试次数 total_test_step = 0 # 训练的轮数 epoch = 10 # 添加Tensorboard writer = SummaryWriter("../logs_train") for i in range(epoch): print("------第 {} 轮训练开始------".format(i + 1)) # 训练开始 nurealnetwork.train() for data in train_dataloader: imgs, targets = data outputs = nurealnetwork(imgs) loss = loss_fn(outputs, targets) # 优化器优化模型 # 梯度清零 optimizer.zero_grad() # 反向传播得出每一个梯度 loss.backward() # 对其中的参数进行优化 optimizer.step() # 训练结束,total_train_step加一 total_train_step += 1 # 减少打印的量 if total_train_step % 100 == 0: print("训练次数:{} , Loss: {}".format(total_train_step, loss.item())) writer.add_scalar("train_loss", loss.item(), total_train_step) #验证集 nurealnetwork.eval() total_test_loss = 0 total_accuracy = 0 with torch.no_grad(): for data in test_dataloader: imgs, targets = data outputs = nurealnetwork(imgs) # 比较输出与目标之间的差距 loss = loss_fn(outputs, targets) total_test_loss += loss.item() accuracy = (outputs.argmax(1) == targets).sum() total_accuracy += accuracy print("整体测试集上的Loss:{}".format(total_test_loss)) print("整体测试集上的正确率:{}".format(total_accuracy / test_data_size)) writer.add_scalar("test_loss", total_test_loss, total_test_step) writer.add_scalar("test_accracy", total_accuracy / test_data_size, total_test_step) total_test_step += 1 # 保存模型 torch.save(nurealnetwork, "nurealnetwork_{}.pth".format(i)) #官方推荐保存方式 # torch.save(nurealnetwork.state_dict(), "nurealnetwork_{}.pth".format(i)) print("模型已保存") writer.close()
GPU训练模型
CPU训练 vs GPU训练
CPU 用时
GPU 用时
用任意的模型测试神经网络能否预测成功
# test.py # @Time: 2022/1/15 15:40 # @Author: 金阳 # @Parameter: # @Version: 1.0.1 import torch import torchvision from PIL import Image from torch import nn image_path = "../images/dog.png" image = Image.open(image_path) print(image) image = image.convert("RGB") transform =torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)), torchvision.transforms.ToTensor()]) image =transform(image) print(image.shape) class Tudui(nn.Module): def __init__(self): super(Tudui, self).__init__() self.model = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=2), nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=2), nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=2), nn.Flatten(), nn.Linear(64*4*4, 64), nn.Linear(64, 10) ) def forward(self, x): x = self.model(x) return x model = torch.load("tudui_29_gpu.pth") print(model) image = torch.reshape(image, (1, 3, 32, 32)) model.eval() with torch.no_grad(): image = image.cuda() output = model(image) print(output) print(output.argmax(1))
五、项目实战
YOLO v5训练
下载代码:https://github.com/ultralytics/yolov5/tree/v6.0
安装,配置环境
pip install -r requirements.txt
如果安装依赖库太慢了或者失败了,建议在Anaconda命令行里,进入项目的对应仓库里,安装依赖库,命令如下
pip install 依赖库 -i https://pypi.tuna.tsinghua.edu.cn/simple
def parse_opt(): parser = argparse.ArgumentParser() # 设置权重,可选预训练数据集模型有yolov5s.pt yolov5m.pt yolov5l.pt yolov5x.pt 依次增大,训练所需时间也相应更久,效果更好 parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') # 设置训练的径在哪里 parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') # 默认指定了图片尺寸640px, parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image') #指定是用CPU训练还是GPU训练,如果有多个GPU,可以同时开启 parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') # 训练过程中可以预览图片,如果是训练视频则可以预览视频 parser.add_argument('--view-img', action='store_true', help='show results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes') parser.add_argument('--nosave', action='store_true', help='do not save images/videos') parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3') parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--visualize', action='store_true', help='visualize features') parser.add_argument('--update', action='store_true', help='update all models') parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') opt = parser.parse_args() opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand print_args(FILE.stem, opt) return opt
效果如图:
视频检测
将batch_size = 32 调为4,不然显卡会带不动
测试集跑完整个视频,大概花了半个小时
本地跑不动的可以用Google colab 云端服务器跑
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)') # 指定视频的路径 parser.add_argument('--source', type=str, default='data/video/movie.mp4', help='source') # file/folder, 0 for webcam parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--view-img', action='store_true', help='display results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--nosave', action='store_true', help='do not save images/videos') parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--update', action='store_true', help='update all models') parser.add_argument('--project', default='runs/detect', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') opt = parser.parse_args() print(opt) check_requirements(exclude=('pycocotools', 'thop')) with torch.no_grad(): if opt.update: # update all models (to fix SourceChangeWarning) for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']: detect() strip_optimizer(opt.weights) else: detect()
视频截图:
训练coco128数据集
设置参数
训练结果:
VOC2007(Visual Object Classes)数据集
Annotations:包含了xml文件,描述了图片的各种信息,特别是标注出了目标的位置坐标
ImageSets:主要关注下main文件夹的内容,里面的文件包含了不同类别的训练/验证数据集图片名称
JPEGImages:原图片
SegmentatioClass、SegmenObject:语义分割
coco数据集(Common Objects in Context)
待更新
自制数据集,训练
待更新
FaceMaskDetection 人脸面罩检测
源码https://github.com/AIZOOTech/FaceMaskDetection
# -*- coding:utf-8 -*- import cv2 import time import argparse import numpy as np from PIL import Image from utils.anchor_generator import generate_anchors from utils.anchor_decode import decode_bbox from utils.nms import single_class_non_max_suppression from load_model.pytorch_loader import load_pytorch_model, pytorch_inference # model = load_pytorch_model('models/face_mask_detection.pth'); model = load_pytorch_model('models/model360.pth') # anchor configuration #feature_map_sizes = [[33, 33], [17, 17], [9, 9], [5, 5], [3, 3]] feature_map_sizes = [[45, 45], [23, 23], [12, 12], [6, 6], [4, 4]] anchor_sizes = [[0.04, 0.056], [0.08, 0.11], [0.16, 0.22], [0.32, 0.45], [0.64, 0.72]] anchor_ratios = [[1, 0.62, 0.42]] * 5 # generate anchors anchors = generate_anchors(feature_map_sizes, anchor_sizes, anchor_ratios) # for inference , the batch size is 1, the model output shape is [1, N, 4], # so we expand dim for anchors to [1, anchor_num, 4] anchors_exp = np.expand_dims(anchors, axis=0) id2class = {0: 'Mask', 1: 'NoMask'} def inference(image, conf_thresh=0.5, iou_thresh=0.4, target_shape=(160, 160), draw_result=True, show_result=True ): ''' Main function of detection inference :param image: 3D numpy array of image :param conf_thresh: the min threshold of classification probabity. :param iou_thresh: the IOU threshold of NMS :param target_shape: the model input size. :param draw_result: whether to daw bounding box to the image. :param show_result: whether to display the image. :return: ''' # image = np.copy(image) output_info = [] height, width, _ = image.shape image_resized = cv2.resize(image, target_shape) image_np = image_resized / 255.0 # 归一化到0~1 image_exp = np.expand_dims(image_np, axis=0) image_transposed = image_exp.transpose((0, 3, 1, 2)) y_bboxes_output, y_cls_output = pytorch_inference(model, image_transposed) # remove the batch dimension, for batch is always 1 for inference. y_bboxes = decode_bbox(anchors_exp, y_bboxes_output)[0] y_cls = y_cls_output[0] # To speed up, do single class NMS, not multiple classes NMS. bbox_max_scores = np.max(y_cls, axis=1) bbox_max_score_classes = np.argmax(y_cls, axis=1) # keep_idx is the alive bounding box after nms. keep_idxs = single_class_non_max_suppression(y_bboxes, bbox_max_scores, conf_thresh=conf_thresh, iou_thresh=iou_thresh, ) for idx in keep_idxs: conf = float(bbox_max_scores[idx]) class_id = bbox_max_score_classes[idx] bbox = y_bboxes[idx] # clip the coordinate, avoid the value exceed the image boundary. xmin = max(0, int(bbox[0] * width)) ymin = max(0, int(bbox[1] * height)) xmax = min(int(bbox[2] * width), width) ymax = min(int(bbox[3] * height), height) if draw_result: if class_id == 0: color = (0, 255, 0) else: color = (255, 0, 0) cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2) cv2.putText(image, "%s: %.2f" % (id2class[class_id], conf), (xmin + 2, ymin - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color) output_info.append([class_id, conf, xmin, ymin, xmax, ymax]) if show_result: Image.fromarray(image).show() return output_info def run_on_video(video_path, output_video_name, conf_thresh): cap = cv2.VideoCapture(video_path) height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) fps = cap.get(cv2.CAP_PROP_FPS) fourcc = cv2.VideoWriter_fourcc(*'XVID') # writer = cv2.VideoWriter(output_video_name, fourcc, int(fps), (int(width), int(height))) total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT) if not cap.isOpened(): raise ValueError("Video open failed.") return status = True idx = 0 while status: start_stamp = time.time() status, img_raw = cap.read() img_raw = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB) read_frame_stamp = time.time() if (status): inference(img_raw, conf_thresh, iou_thresh=0.5, target_shape=(360, 360), draw_result=True, show_result=False) cv2.imshow('image', img_raw[:, :, ::-1]) cv2.waitKey(1) inference_stamp = time.time() # writer.write(img_raw) write_frame_stamp = time.time() idx += 1 print("%d of %d" % (idx, total_frames)) print("read_frame:%f, infer time:%f, write time:%f" % (read_frame_stamp - start_stamp, inference_stamp - read_frame_stamp, write_frame_stamp - inference_stamp)) # writer.release() if __name__ == "__main__": parser = argparse.ArgumentParser(description="Face Mask Detection") parser.add_argument('--img-mode', type=int, default=1, help='set 1 to run on image, 0 to run on video.') parser.add_argument('--img-path', type=str, default='img_test/test5.jpg', help='path to your image.') parser.add_argument('--video-path', type=str, default='0', help='path to your video, `0` means to use camera.') # parser.add_argument('--hdf5', type=str, help='keras hdf5 file') args = parser.parse_args() if args.img_mode: imgPath = args.img_path img = cv2.imread(imgPath) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) inference(img, show_result=True, target_shape=(360, 360)) else: video_path = args.video_path if args.video_path == '0': video_path = 0 run_on_video(video_path, '', conf_thresh=0.5)
图片检测结果:
python pytorch_infer.py --img-path /path/to/your/img
前:
视频流检测
python pytorch_infer.py --img-mode 0 --video-path /path/to/video # 如果要打开本地摄像头, video_path填写0就可以了,如下 python pytorch_infer.py --img-mode 0 --video-path 0
–end–