训练
训练主函数
首先是模型实例化,调用损失函数等,传入一些形参。然后如果要加载预权重,比如VGG16或者原SSD的权重,直接加载或报关于keys错误。因此,需要将原
pretrained_dict = {k:v for k,v in pretrained_dict.items() if np.shape(model_dict[k])== np.shape(pretrained_dict[k])}
改为:
pretrained_dict = {k:v for k,v in pretrained_dict.items() if pretrained_dict.keys()==model_dict.keys()}
意思只有键值名字相等才可以加载,不是比对形状。
if __name__ == '__main__': cuda = True criterion = MultiBoxLoss(2, 0.5, True, 0, True, 3, 0.5, False, cuda) # 定义损失函数 model = Mymodel("train", num_classes=2, confidence=0.6, nms_iou=0.5) model_path = r'./nets/ssd_weights.pth' model_dict = model.state_dict() device = torch.device('cuda') pretrained_dict = torch.load(model_path, map_location=device) #pretrained_dict = {k:v for k,v in pretrained_dict.items() if np.shape(model_dict[k])== np.shape(pretrained_dict[k])} pretrained_dict = {k:v for k,v in pretrained_dict.items() if pretrained_dict.keys()==model_dict.keys()} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) print("完成预权重的加载") model.to(device) batch_size = 2 annotation_path = r'2007_train.txt' with open(annotation_path, encoding='utf-8') as f: lines = f.readlines() np.random.seed(10101) np.random.shuffle(lines) np.random.seed(None) val = 0.1 num_val = int(len(lines) * val) num_train = len(lines) - num_val model.train() Use_Data_Loader = True lr = 5e-4 optimizer = optim.Adam(model.parameters(), lr=lr) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.92) Init_Epoch = 0 Freeze_Epoch = 50 if Use_Data_Loader: train_dataset = MyDatasets(lines[:num_train], (300, 300), True) # train_dataset返回数据集和标签(且这个是可以迭代的) train_data = DataLoader(train_dataset, batch_size, False, num_workers=4, pin_memory=True, collate_fn=my_dataset_collate) val_dataset = MyDatasets(lines[num_train:], (300, 300), False) val_data = DataLoader(val_dataset, shuffle=True, batch_size=batch_size, num_workers=4, pin_memory=True, drop_last=True, collate_fn=my_dataset_collate) epoch_size = num_train // batch_size epoch_size_val = num_val // batch_size for param in model.backbone.parameters(): param.requires_grad = True for epoch in range(50): do_train(model, criterion, epoch, epoch_size, epoch_size_val, train_data, val_data, Freeze_Epoch, cuda) lr_scheduler.step()
训练函数:
def get_lr(optimizer): for param_group in optimizer.param_groups: return param_group['lr'] def do_train(model, criterion, epoch, epoch_size, epoch_size_val, gen, genval, Epoch, cuda): loc_loss = 0 conf_loss = 0 loc_loss_val = 0 conf_loss_val = 0 model.train() with tqdm(total=epoch_size, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar: for iteration, batch in enumerate(gen): if iteration >= epoch_size: break images, targets = batch[0], batch[1] # images[batch_size,3,300,300] # targets是一个列表,有5个向量(x1,y1,x2,y2,标签) with torch.no_grad(): if cuda: images = Variable(torch.from_numpy(images).type(torch.FloatTensor)).cuda() targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)).cuda() for ann in targets] else: images = Variable(torch.from_numpy(images).type(torch.FloatTensor)) targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets] output = model( images) # output返回loc(batch_size,num_anchors,4[坐标]),conf(batch_size,num_anchors,num_classes) optimizer.zero_grad() loss_l, loss_c = criterion(output, targets) loss = loss_l + loss_c # ----------------------# # 反向传播 # ----------------------# loss.backward() optimizer.step() loc_loss += loss_l.item() conf_loss += loss_c.item() pbar.set_postfix(**{'loc_loss': loc_loss / (iteration + 1), 'conf_loss': conf_loss / (iteration + 1), 'lr': get_lr(optimizer)}) pbar.update(1) model.eval() print('Start Validation') with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar: for iteration, batch in enumerate(genval): if iteration >= epoch_size_val: break images, targets = batch[0], batch[1] with torch.no_grad(): if cuda: images = Variable(torch.from_numpy(images).type(torch.FloatTensor)).cuda() targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)).cuda() for ann in targets] else: images = Variable(torch.from_numpy(images).type(torch.FloatTensor)) targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets] out = model(images) optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loc_loss_val += loss_l.item() conf_loss_val += loss_c.item() pbar.set_postfix(**{'loc_loss': loc_loss_val / (iteration + 1), 'conf_loss': conf_loss_val / (iteration + 1), 'lr': get_lr(optimizer)}) pbar.update(1) total_loss = loc_loss + conf_loss val_loss = loc_loss_val + conf_loss_val print('Finish Validation') print('Epoch:' + str(epoch + 1) + '/' + str(Epoch)) print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss / (epoch_size + 1), val_loss / (epoch_size_val + 1))) print('Saving state, iter:', str(epoch + 1)) torch.save(model.state_dict(),r'./logs/mymodel.pth')
我这里只训练了50个epoch。试试效果,是可以正常训练的。
预测
边界框解码
在预测前,我们需要对预测结果进行解码
参数说明:num_classes是类别数量
bkg_label:背景类标签
top_k:指的把预测概率高的前k个结果
conf_thresh:置信度
nms_thresh:NMS阈值
class Detect(nn.Module): def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh): super().__init__() self.num_classes = num_classes self.background_label = bkg_label self.top_k = top_k self.nms_thresh = nms_thresh if nms_thresh <= 0: raise ValueError('nms_threshold must be non negative.') self.conf_thresh = conf_thresh self.variance = Config['variance'] def forward(self, loc_data, conf_data, prior_data): #--------------------------------# # 先转换成cpu下运行 #--------------------------------# loc_data = loc_data.cpu() conf_data = conf_data.cpu() #--------------------------------# # num的值为batch_size # num_priors为先验框的数量 #--------------------------------# num = loc_data.size(0) num_priors = prior_data.size(0) output = torch.zeros(num, self.num_classes, self.top_k, 5) # 建立一个output阵列存放输出 #--------------------------------------# # 对分类预测结果进行reshape # num, num_classes, num_priors #--------------------------------------# conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) # transpose是将num_priors,num_classes维度进行反转 # 对每一张图片进行处理正常预测的时候只有一张图片,所以只会循环一次 for i in range(num): #--------------------------------------# # 对先验框解码获得预测框 # 解码后,获得的结果的shape为 # num_priors, 4 #--------------------------------------# decoded_boxes = decode(loc_data[i], prior_data, self.variance) # 回归预测loc_data的结果对先验框进行调整 conf_scores = conf_preds[i].clone() #--------------------------------------# # 获得每一个类对应的分类结果 # num_priors, #--------------------------------------# for cl in range(1, self.num_classes): #--------------------------------------# # 首先利用门限进行判断 # 然后取出满足门限的得分 #--------------------------------------# c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] if scores.size(0) == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) #--------------------------------------# # 将满足门限的预测框取出来 #--------------------------------------# boxes = decoded_boxes[l_mask].view(-1, 4) #--------------------------------------# # 利用这些预测框进行非极大抑制 #--------------------------------------# ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[i, cl, :count] = torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) return output
输出预测结果
这里的最终预测我也用的SSD中的方法,完整代码如下:
class VGG16(object): _defaults = { "model_path": 'logs/mymodel.pth', # 加载权重文件 "classes_path": 'model_data/new_classes.txt', # 加载类文件 "input_shape": (300, 300, 3), # 图像的shape "confidence": 0.6, # 置信度 "nms_iou": 0.5, # NMS阈值 "cuda": True, # 是否用GPU } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" def __init__(self, **kwargs): self.__dict__.update(self._defaults) self.class_names = self._get_class() self.generate() # ---------------------------------------------------# # 获得所有的分类 # ---------------------------------------------------# def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names # ---------------------------------------------------# # 载入模型 # ---------------------------------------------------# def generate(self): # -------------------------------# # 计算总的类的数量 # -------------------------------# self.num_classes = len(self.class_names) + 1 # -------------------------------# # 载入模型与权值 # -------------------------------# model = Mymodel("test",2,self.confidence, self.nms_iou) # 实例化,调用模型 print('Loading weights into state dict...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.load_state_dict(torch.load(self.model_path, map_location=device)) self.net = model.eval() # 模型测试 if self.cuda: self.net = torch.nn.DataParallel(self.net) cudnn.benchmark = True self.net = self.net.cuda() print('{} model, anchors, and classes loaded.'.format(self.model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) # ---------------------------------------------------# # 检测图片 # ---------------------------------------------------# def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) # ---------------------------------------------------# # 不失真的resize,给图像周围增加灰条 # ---------------------------------------------------# crop_img = np.array(letterbox_image(image, (self.input_shape[1], self.input_shape[0]))) with torch.no_grad(): # ---------------------------------------------------# # 图片预处理,归一化 # ---------------------------------------------------# photo = Variable( torch.from_numpy(np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)), 0)).type(torch.FloatTensor)) if self.cuda: photo = photo.cuda() # ---------------------------------------------------# # 传入网络进行预测 # ---------------------------------------------------# preds = self.net(photo) top_conf = [] top_label = [] top_bboxes = [] # ---------------------------------------------------# # preds的shape为 1, num_classes, top_k, 5 # ---------------------------------------------------# for i in range(preds.size(1)): j = 0 while preds[0, i, j, 0] >= self.confidence: # ---------------------------------------------------# # score为当前预测框的得分 # label_name为预测框的种类 # ---------------------------------------------------# score = preds[0, i, j, 0] label_name = self.class_names[i - 1] # ---------------------------------------------------# # pt的shape为4, 当前预测框的左上角右下角 # ---------------------------------------------------# pt = (preds[0, i, j, 1:]).detach().numpy() coords = [pt[0], pt[1], pt[2], pt[3]] top_conf.append(score) top_label.append(label_name) top_bboxes.append(coords) j = j + 1 # 如果不存在满足门限的预测框,直接返回原图 if len(top_conf) <= 0: return image top_conf = np.array(top_conf) top_label = np.array(top_label) top_bboxes = np.array(top_bboxes) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims( top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1) boxes = ssd_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0], 1) for i, c in enumerate(top_label): predicted_class = c score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index(predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
预测结果: