4、总结
1、使用在一般数据库上训练的GoogleNet,大大提升了了准确率,超出第二名约10%;
2、使用classfication+verification的复合损失,比单独损失提升准确率7%-9%,添加triplet loss不能提升准确率;
3、在verification中使用统一的dropout提升准确率3%;
4、分两步fine-tune提升准确率8.7%;
5、使用在ImageNet上预训练的特征提取网络,能够值得提取的特征更具有“目标”语义。
5、基于表征学习的ReID方法实践
本项目基于以上说明的论文进行实践,数据集时Market1501数据集。针对论文中的Baseline网络GoogleNet进行了替换,实践的Baseline网络为ResNet50模型,同时使用了与训练的方式对论文进行了实践。
5.1、Market1501数据集介绍
Market-1501 数据集在清华大学校园中采集,夏天拍摄,在2015年构建并公开。它包括由6个摄像头(其中5个高清摄像头和1个低清摄像头)拍摄到的1501个行人、32668个检测到的行人矩形框。每个行人至少由2个摄像头捕获到,并且在一个摄像头中可能具有多张图像。训练集有751人,包含12,936张图像,平均每个人有17.2张训练数据;测试集有750人,包含19,732张图像,平均每个人有26.3张测试数据。3368张查询图像的行人检测矩形框是人工绘制的,而gallery中的行人检测矩形框则是使用DPM检测器检测得到的。该数据集提供的固定数量的训练集和测试集均可以在single-shot或multi-shot测试设置下使用。
目录介绍
1、bounding_box_test:用于测试集的750人,包含19,732张图像,前缀为0000表示在提取这750人的过程中DPM检测错的图(可能与query是同一个人),-1表示检测出来其他人的图(不在这750人中)
2、bounding_box_train:用于训练集的751人,包含12,936张图像
3、query:为750人在每个摄像头中随机选择一张图像作为query,因此一个人的query最多有6个,共有3,368 张图像
4、gt_query:matlab格式,用于判断一个query的哪些图片是好的匹配(同一个人不同摄像头的图像)和不好的匹配(同一个人同一个摄像头的图像或非同一个人的图像)
5、gt_bbox:手工标注的bounding box,用于判断DPM检测的bounding box是不是一个好的box
命名规则:以 0001_c1s1_000151_01.jpg为例
1、0001表示每个人的标签编号,从0001到1501;
2、c1表示第一个摄像头(camera1),共有6个摄像头;
3、s1表示第一个录像片段(sequece1),每个摄像机都有数个录像段;
4、000151表示c1s1的第000151帧图片,视频帧率25 Fps;
5、01 表示c1s1_001051这一帧上的第1个检测框,由于采用DPM检测器,对于每一帧上的行人可能会框出好几个bbox。00表示手工标注框
5.2、 数据集制作
from __future__ import print_function, absolute_import import os import numpy as np from PIL import Image import os.path as osp import torch from torch.utils.data import DataLoader, Dataset def read_image(img_path): got_img = False if not osp.exists(img_path): raise IOError("{} dosevnot exist".format(img_path)) while not got_img: try: img = Image.open(img_path).convert('RGB') got_img = True except IOError: print('dose not read image') pass return img class ImageDataset(Dataset): def __init__(self, dataset, transform=None): self.dataset = dataset self.transform = transform def __len__(self): return len(self.dataset) def __getitem__(self, index): img_path, pid, camid = self.dataset[index] img = read_image(img_path) if self.transform is not None: img = self.transform(img) return img, pid, camid if __name__ == "__main__": from data_manager import Market1501 dataset = Market1501(root='../../data/') train_loader = ImageDataset(dataset.train) for batch_id, (imgs, pid, camid) in enumerate(train_loader): imgs.save('aaa.jpg') break
5.3、网络模型
from __future__ import absolute_import import torch from torch import nn from torch.nn import functional as F import torchvision from IPython import embed class ResNet50(nn.Module): def __init__(self, num_classes, loss={'softmax, metric'}, **kwargs): super(ResNet50, self).__init__() self.loss = loss resnet50 = torchvision.models.resnet50(pretrained=True) self.base = nn.Sequential(*list(resnet50.children())[:-2]) self.classifier = nn.Linear(2048, num_classes) self.feat_dim = 2048 # feature dimension def forward(self, x): x = self.base(x) x = F.avg_pool2d(x, x.size()[2:]) f = x.view(x.size(0), -1) # 归一化处理 f = 1. * f / (torch.norm(f, 2, dim=-1, keepdim=True).expand_as(f) + 1e-12) if not self.training: return f y = self.classifier(f) return y if __name__ == "__main__": img = torch.Tensor(32, 3, 256, 128) model = ResNet50(num_classes=751) f = model(img) embed()
5.4、主函数
from __future__ import print_function, absolute_import import os import sys import time import datetime import argparse import os.path as osp import numpy as np import torch import torch.nn as nn import torch.backends.cudnn as cudnn from torch.utils.data import DataLoader from torch.optim import lr_scheduler from util.losses import CrossEntropyLoss import util.data_manager as data_manager from util.data_loader import ImageDataset import util.transforms as T import models from util.optimizers import init_optim from util.utils import AverageMeter, Logger, save_checkpoint from util.eval_metrics import evaluate parser = argparse.ArgumentParser(description='Train image model with center loss') # Datasets parser.add_argument('--root', type=str, default='../data', help="root path to data directory") parser.add_argument('-d', '--dataset', type=str, default='market1501', choices=data_manager.get_names()) parser.add_argument('-j', '--workers', default=0, type=int, help="number of data loading workers (default: 4)") parser.add_argument('--height', type=int, default=256, help="height of an image (default: 256)") parser.add_argument('--width', type=int, default=128, help="width of an image (default: 128)") # Optimization options # parser.add_argument('--label_smooth', action='store_true', type=int, help="label smooth") parser.add_argument('--optim', type=str, default='adam', help="optimization algorithm") parser.add_argument('--max-epoch', default=50, type=int, help="maximum epochs to run") parser.add_argument('--start-epoch', default=0, type=int, help="manual epoch number (useful on restarts)") parser.add_argument('--train-batch', default=8, type=int, help="train batch size") parser.add_argument('--test-batch', default=8, type=int, help="test batch size") parser.add_argument('--lr', '--learning-rate', default=0.0002, type=float, help="initial learning rate") parser.add_argument('--stepsize', default=10, type=int, help="stepsize to decay learning rate (>0 means this is enabled)") parser.add_argument('--gamma', default=0.1, type=float, help="learning rate decay") parser.add_argument('--weight-decay', default=5e-04, type=float, help="weight decay (default: 5e-04)") # Architecture parser.add_argument('-a', '--arch', type=str, default='resnet50', choices=models.get_names()) # Miscs parser.add_argument('--print-freq', type=int, default=10, help="print frequency") parser.add_argument('--seed', type=int, default=1, help="manual seed") parser.add_argument('--resume', type=str, default='', metavar='PATH') parser.add_argument('--evaluate', action='store_true', help="evaluation only") parser.add_argument('--eval-step', type=int, default= 10, help="run evaluation for every N epochs (set to -1 to test after training)") parser.add_argument('--start-eval', type=int, default=0, help="start to evaluate after specific epoch") parser.add_argument('--save-dir', type=str, default='./log') parser.add_argument('--use-cpu', action='store_true', help="use cpu") parser.add_argument('--gpu-devices', default='0', type=str, help='gpu device ids for CUDA_VISIBLE_DEVICES') args = parser.parse_args() def test(model, queryloader, galleryloader, use_gpu, ranks=[1, 5, 10, 20]): batch_time = AverageMeter() model.eval() with torch.no_grad(): qf, q_pids, q_camids = [], [], [] for batch_idx, (imgs, pids, camids) in enumerate(queryloader): if use_gpu: imgs = imgs.cuda() end = time.time() features = model(imgs) batch_time.update(time.time() - end) features = features.data.cpu() qf.append(features) q_pids.extend(pids) q_camids.extend(camids) qf = torch.cat(qf, 0) # 按维数0(行)拼接 q_pids = np.asarray(q_pids) q_camids = np.asarray(q_camids) print("Extracted features for query set, obtained {}-by-{} matrix".format(qf.size(0), qf.size(1))) gf, g_pids, g_camids = [], [], [] for batch_idx, (imgs, pids, camids) in enumerate(galleryloader): if use_gpu: imgs = imgs.cuda() end = time.time() features = model(imgs) batch_time.update(time.time() - end) features = features.data.cpu() gf.append(features) g_pids.extend(pids) g_camids.extend(camids) gf = torch.cat(gf, 0) g_pids = np.asarray(g_pids) g_camids = np.asarray(g_camids) print("Extracted features for gallery set, obtained {}-by-{} matrix".format(gf.size(0), gf.size(1))) print("==> BatchTime(s)/BatchSize(img): {:.4f}/{}".format(batch_time.avg, args.test_batch)) m, n = qf.size(0), gf.size(0) # 计算 pairwise_distance distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \ torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t() distmat.addmm_(1, -2, qf, gf.t()) distmat = distmat.numpy() print("Computing CMC and mAP") cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=False) print("Results ----------") print("mAP: {:.1%}".format(mAP)) print("CMC curve") for r in ranks: print("Rank-{:<3}: {:.1%}".format(r, cmc[r - 1])) print("------------------") return cmc[0] def main(): use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False if use_gpu: pin_memory = True else: pin_memory = False if not args.evaluate: sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) # 保存训练日志 else: sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) # 保存测试日志 print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU (GPU is highly recommended)") print("Initializing dataset {}".format(args.dataset)) dataset = data_manager.init_img_dataset(root=args.root, name=args.dataset) # 训练时需要数据增广 ==> transform_train transform_train = T.Compose([ T.Random2DTranslation(args.height, args.width), # 数据增广 ==> 随机裁剪 T.RandomHorizontalFlip(), # 数据增广 ==> 随机水平翻转 T.ToTensor(), # 转变为 Tensor T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # 数据归一化 ==> 默认的 ]) # 测试时不需要数据增广 ==> transform_test transform_test = T.Compose([ T.Resize((args.height, args.width)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) trainloader = DataLoader( ImageDataset(dataset.train, transform=transform_train), batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True, ) queryloader = DataLoader( ImageDataset(dataset.query, transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) galleryloader = DataLoader( ImageDataset(dataset.gallery, transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss='softmax') print("Model size: {:.5f}M".format(sum(p.numel() for p in model.parameters())/1000000.0)) criterion_class = CrossEntropyLoss(use_gpu=use_gpu) optimizer = init_optim(args.optim, model.parameters(), args.lr, args.weight_decay) if args.stepsize > 0: # 每过step_size次,更新一次学习率; scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) start_epoch = args.start_epoch if args.resume: print("Loading checkpoint from '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) start_epoch = checkpoint['epoch'] if use_gpu: # 用于并行加载多 GPU 数据 model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") test(model, queryloader, galleryloader, use_gpu) return 0 train_time = 0 best_rank1 = -np.inf best_epoch = 0 print("==> Start training") for epoch in range(start_epoch, args.max_epoch): start_train_time = time.time() train(epoch, model, criterion_class, optimizer, trainloader, use_gpu) train_time += round(time.time() - start_train_time) if args.stepsize > 0: scheduler.step() if (epoch + 1) > args.start_eval and args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or ( epoch + 1) == args.max_epoch: print("==> Test") rank1 = test(model, queryloader, galleryloader, use_gpu) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(best_rank1, best_epoch)) def train(epoch, model, criterion_class, optimizer, trainloader, use_gpu): model.train() losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() end = time.time() for batch_idx, (imgs, pids, _) in enumerate(trainloader): if use_gpu: imgs, pids = imgs.cuda(), pids.cuda() # measure data loading time data_time.update(time.time() - end) outputs = model(imgs) loss = criterion_class(outputs, pids) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) losses.update(loss.item(), pids.size(0)) end = time.time() if (batch_idx + 1) % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.4f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.4f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(epoch + 1, batch_idx + 1, len(trainloader), batch_time=batch_time, data_time=data_time, loss=losses)) if __name__ == '__main__': main()
5.5、训练结果展示
暂时只训练了10个epoch,笔记本不给力呀,,,,,
参考:
https://zhuanlan.zhihu.com/p/83411679
https://cysu.github.io/open-reid/index.html
https://blog.csdn.net/ctwy291314/article/details/83544088
https://blog.csdn.net/weixin_41427758/article/details/80372997
https://blog.csdn.net/shenxiaolu1984/article/details/53607268
https://blog.csdn.net/weixin_41526905/article/details/80341746