一、人员聚集识别挑战赛
比赛链接:challenge.xfyun.cn/topic/info?…
疫情给人们带来的影响体现在衣食住行的方方面面。在疫情防控中,需要减少非必要的聚集性活动,减少参加聚集性活动的人员。对于个人而言,尽量不前往人员聚集场所尤其是密闭式场所。
在本次比赛中我们需要参赛选手开发算法模型,对图中的人数进行识别。为了简化赛题难度我们只需要选手识别出人数即可,不需要预测行人位置。
1.赛事任务
对输入的图片进行识别,给出图片中包含多少个人。选手需要根据训练集构建模型对测试集的图片进行预测,按照测试集精度进行排名。
2.数据说明
赛题数据由训练集和测试集组成,需要选手对测试集进行预测。训练集给出了近1000张图片及图片上人数标签。
3. 评估指标
本次竞赛的评价标准采用回归任务指标,最高分为0。
计算方法参考:scikit-learn.org/stable/modu…
评估代码参考:
二、数据集处理
1.解压缩数据
!unzip -O gbk ~/data/data169718/人员聚集识别挑战赛数据集.zip -d ~/data
Archive: /home/aistudio/data/data169718/人员聚集识别挑战赛数据集.zip creating: /home/aistudio/data/人员聚集识别挑战赛数据集/ inflating: /home/aistudio/data/人员聚集识别挑战赛数据集/sample_submit.csv extracting: /home/aistudio/data/人员聚集识别挑战赛数据集/test.zip inflating: /home/aistudio/data/人员聚集识别挑战赛数据集/train.zip inflating: /home/aistudio/data/人员聚集识别挑战赛数据集/train_label.csv
!unzip -qoa ~/data/人员聚集识别挑战赛数据集/test.zip -d ~/data
!unzip -qoa ~/data/人员聚集识别挑战赛数据集/train.zip -d ~/data
!head data/人员聚集识别挑战赛数据集/train_label.csv
name,count mhTTqKoHOb.jpg,90 uEEMGtcAsu.jpg,14 rbLlYPkBAJ.jpg,74 FzosxVdhns.jpg,208 xdCWXNwkOl.jpg,300 RxPAIwoPzq.jpg,57 XGsfUEDxxM.jpg,108 BBLlyvznot.jpg,90 xCXqyjffHb.jpg,297
!wc -l data/人员聚集识别挑战赛数据集/train_label.csv
899 data/人员聚集识别挑战赛数据集/train_label.csv
2.数据集分割
!head -n700 data/人员聚集识别挑战赛数据集/train_label.csv >data/train_1.txt !tail -n199 data/人员聚集识别挑战赛数据集/train_label.csv >data/eval.txt
!sed 1d data/train_1.txt >data/train.txt
!head data/train.txt
mhTTqKoHOb.jpg,90 uEEMGtcAsu.jpg,14 rbLlYPkBAJ.jpg,74 FzosxVdhns.jpg,208 xdCWXNwkOl.jpg,300 RxPAIwoPzq.jpg,57 XGsfUEDxxM.jpg,108 BBLlyvznot.jpg,90 xCXqyjffHb.jpg,297 RGelFzLgqr.jpg,266
3.自定义数据集
import os import zipfile import random import json import cv2 import numpy as np from PIL import Image import matplotlib.pyplot as plt import paddle from paddle.io import Dataset import paddle.nn as nn import paddle import paddle.nn.functional as F from paddle.vision.transforms import ToTensor import numpy as np import matplotlib.pyplot as plt print(paddle.__version__)
2.3.2
import numpy as np from paddle.io import Dataset import paddle import paddle.vision.transforms as T import numpy as np from PIL import Image import os class CrowdDataset(paddle.io.Dataset): """ 2类Bee数据集类的定义 """ def __init__(self,mode='train'): """ 初始化函数 """ self.data = [] with open('data/{}.txt'.format(mode)) as f: for line in f.readlines(): info = line.strip().split(',') if len(info) > 0: self.data.append([info[0].strip(), info[1].strip()]) if mode == 'train': self.transforms = T.Compose([ T.Resize((224,224)), T.RandomHorizontalFlip(), # 随机水平翻转 T.RandomVerticalFlip(), T.ToTensor(), # 数据的格式转换和标准化 HWC => CHW T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 图像归一化 ]) else: self.transforms = T.Compose([ T.Resize((224,224)), # 图像大小修改 T.ToTensor(), # 数据的格式转换和标准化 HWC => CHW T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 图像归一化 ]) def get_origin_data(self): return self.data def __getitem__(self, index): """ 根据索引获取单个样本 """ image_file, label = self.data[index] image_file=os.path.join("data/train/",image_file) img = Image.open(image_file) if img.mode != 'RGB': img = img.convert('RGB') img = self.transforms(img) # label 缩小了400倍,loss减小点,不然晃晃悠悠loss下不去 return img, np.array(float(label)/400, dtype='float32') def __len__(self): """ 获取样本总数 """ return len(self.data)
train_dataset = CrowdDataset(mode="train") eval_dataset = CrowdDataset(mode='eval')
#训练数据加载 train_loader = paddle.io.DataLoader(train_dataset, batch_size=256, shuffle=True) #测试数据加载 eval_loader = paddle.io.DataLoader(eval_dataset, batch_size = 256, shuffle=False)
三、模型定义
以resnet18为基础,修改最后avgpool、fc层,为1维度我们想要的结果。
import paddle from paddle.vision.models import resnet18 # 定义动态图 class Regressor(paddle.nn.Layer): def __init__(self): super(Regressor, self).__init__() model=resnet18() model.avgpool = nn.AdaptiveAvgPool2D(1) model.fc = nn.Linear(512, 1, None) self.resnet = model # 网络的前向计算函数 def forward(self, inputs): out = self.resnet(inputs) return out
四、模型训练
定义超参,并使用均方差进行训练。
epoch_num = 300 learning_rate = 0.001
val_acc_history = [] val_loss_history = [] def train(model): print('start training ... ') # turn into training mode model.train() opt = paddle.optimizer.SGD(learning_rate=learning_rate, parameters=model.parameters()) for epoch in range(epoch_num): for batch_id, data in enumerate(train_loader()): x_data = data[0] y_data = paddle.to_tensor(data[1]) y_data = paddle.unsqueeze(y_data, 1) logits = model(x_data) # print(logits) # print(y_data) loss = F.mse_loss(logits, y_data) if batch_id % 40 == 0: print("epoch: {}, batch_id: {}, loss is: {}".format(epoch, batch_id, loss.numpy())) loss.backward() opt.step() opt.clear_grad() model = Regressor() train(model) paddle.save(model.state_dict(), "model.pdparams") # paddle.save(opt.state_dict(), "adam.pdopt")
W0926 12:33:49.050081 98 gpu_resources.cc:61] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.2, Runtime API Version: 11.2 W0926 12:33:49.053831 98 gpu_resources.cc:91] device: 0, cuDNN Version: 8.2. start training ... /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/nn/layer/norm.py:654: UserWarning: When training, we now always track global mean and variance. "When training, we now always track global mean and variance.") epoch: 0, batch_id: 0, loss is: [1.8235127]
五、预测
1.定义图片预处理
def load_image(img_path): ''' 预测图片预处理 ''' img = Image.open(img_path) if img.mode != 'RGB': img = img.convert('RGB') img = img.resize((224, 224), Image.BILINEAR) img = np.array(img).astype('float32') img = img.transpose((2, 0, 1)) # HWC to CHW img = img/255 # 像素值归一化 return img
W0926 18:55:26.408638 145 gpu_resources.cc:61] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.2, Runtime API Version: 11.2 W0926 18:55:26.412569 145 gpu_resources.cc:91] device: 0, cuDNN Version: 8.2.
2.载入模型
para_state_dict = paddle.load("model.pdparams") model = Regressor() model.set_state_dict(para_state_dict) #加载模型参数 model.eval() #验证模式
3.预测一张图片
import glob import math #展示预测图片 infer_path='data/test/LoBMoEUjce.jpg' img = Image.open(infer_path) plt.imshow(img) #根据数组绘制图像 plt.show() #显示图像 #对预测图片进行预处理 infer_imgs = [] infer_imgs.append(load_image(infer_path)) infer_imgs = np.array(infer_imgs) for i in range(len(infer_imgs)): data = infer_imgs[i] dy_x_data = np.array(data).astype('float32') dy_x_data=dy_x_data[np.newaxis,:, : ,:] img = paddle.to_tensor (dy_x_data) out = model(img) print(out) lab = abs(int(out.numpy()*400)) print("{}样本,被预测为:{}".format(infer_path.split('/')[-1], lab)) print("结束")
Tensor(shape=[1, 1], dtype=float32, place=Place(gpu:0), stop_gradient=False, [[-0.28863785]]) LoBMoEUjce.jpg样本,被预测为:115 结束
4.批量预测并保存结果
img_list=glob.glob("data/test/*.jpg") f=open('result.txt','w') f.write("name,count\n") #展示预测图片 for infer_path in img_list: #对预测图片进行预处理 infer_imgs = [] infer_imgs.append(load_image(infer_path)) infer_imgs = np.array(infer_imgs) for i in range(len(infer_imgs)): data = infer_imgs[i] dy_x_data = np.array(data).astype('float32') dy_x_data=dy_x_data[np.newaxis,:, : ,:] img = paddle.to_tensor (dy_x_data) out = model(img) # 前面缩小了400倍,这里乘回去 lab = abs(int(out.numpy()*400)) print("{}样本,被预测为:{}".format(infer_path.split('/')[-1], lab)) f.write(infer_path.split('/')[-1] +','+ str(lab) +'\n') print("结束") f.close()
5.提交结果