目前我遇到过的,基于骨骼点的动作识别三种办法:
- 根据关节角度
- 跟踪算法
- 输入动作识别网络
该部分内容介绍的是自建动作识别网络数据集,达到自己想要的效果:
- 准备项目
- 准备视频
- 做视频水平翻转,增加训练集的数量(非必须)
- 提取骨骼点和置信度(一个视频一个生成json格式文件)
- 根据骨骼点和置信度文件生成label文件
1 项目部署
拉取最新版的openpose的cpu版本:https://github.com/CMU-Perceptual-Computing-Lab/openpose/releases
gpu版本我遇到的问题是最少需要四块gpu才能运行,没找到修改这部分的源码
解压,运行models文件夹中的两个bat文件
2 准备视频
格式如下:每一类动作的视频放到一个文件夹下
3 水平翻转
原本这里用的是skvideo,我换成了cv2(主要我当时看到这里的时候skvideo有4.1节介绍的问题还没有解决)
import os import cv2 if __name__ == '__main__': type_number = 4 # 修改,有几类动作 typename_list = ["left", "right", "up", "down"] # 修改,动作类别名/动作视频文件夹名 for type_index in range(type_number): type_filename = typename_list[type_index] # 视频所在文件夹 originvideo_file = './{}'.format(type_filename) # 得到文件夹内所有文件名 videos_file_names = os.listdir(originvideo_file) # 左右镜像翻转视频 for file_name in videos_file_names: video_path = '{}/{}'.format(originvideo_file, file_name) name_without_suffix = file_name.split('.')[0] outvideo_path = '{}/{}_mirror.mp4'.format(originvideo_file, name_without_suffix) # 输出视频文件路径 cap = cv2.VideoCapture(video_path) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # 定义编解码器并创建VideoWriter对象 fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(outvideo_path, fourcc, 30.0, (width, height)) while cap.isOpened(): ret, frame = cap.read() if not ret: print("Cant receive frame...............") break frame = cv2.flip(frame, 1) out.write(frame) cv2.imshow("frame", frame) if cv2.waitKey(1) == ord("q"): break cap.release() out.release() cv2.destroyAllWindows()
4 提取骨骼点和置信度
这是处理一个视频文件夹下的视频,比如动作为向左的文件夹下的视频1,视频2…
import os import argparse import json import shutil import skvideo skvideo.setFFmpegPath('D:/ffmpeg/bin') import skvideo.io # st_gcn项目下的文件 import st_gcn_tools import st_gcn_tools.utils as utils label_name, label_no = 'running', 0 class PreProcess(): """ 利用openpose提取自建数据集的骨骼点数据 """ def start(self): # 视频所在文件夹 originvideo_file = './Data/hs' # resized视频输出文件夹 resizedvideo_file = './Data/resize' os.makedirs(resizedvideo_file, exist_ok=True) videos_file_names = os.listdir(originvideo_file) # 得到 # 1. Resize文件夹下的视频到340x256 30fps for file_name in videos_file_names: video_path = '{}/{}'.format(originvideo_file, file_name) outvideo_path = '{}/{}'.format(resizedvideo_file, file_name) writer = skvideo.io.FFmpegWriter(outvideo_path, outputdict={'-s': '340x256'}) reader = skvideo.io.FFmpegReader(video_path) for frame in reader.nextFrame(): writer.writeFrame(frame) writer.close() print('{} resize success'.format(file_name)) # 2. 利用openpose提取每段视频骨骼点数据 resizedvideos_file_names = os.listdir(resizedvideo_file) for file_name in resizedvideos_file_names: if not file_name.endswith('mp4'): continue outvideo_path = '{}/{}'.format(resizedvideo_file, file_name) # 拉取的项目的openPoseDemo.exe文件 openpose = f'E:/Code/BehaviorRecognition/openpose_cpu/bin/OpenPoseDemo.exe' video_name = os.path.splitext(file_name)[0] # 下面路径下存放的是每个视频的独立一帧的骨骼坐标和置信度 output_snippets_dir = './Data/resize/snippets/{}'.format(video_name) # 下面文件夹一个文件存放的是一个视频的所有帧的骨骼坐标和置信度 output_sequence_dir = './data/resize/data' os.makedirs(output_snippets_dir, exist_ok=True) os.makedirs(output_sequence_dir, exist_ok=True) output_sequence_path = '{}/{}.json'.format(output_sequence_dir, video_name) # pose estimation openpose_args = dict( video=outvideo_path, write_json=output_snippets_dir, display=0, render_pose=0, model_pose='COCO') command_line = openpose command_line += ' '+ ' '.join(['--{} {}'.format(k, v) for k, v in openpose_args.items()]) # D:/Pycharm_project/Pose/openpose_all/openpose/bin/OpenPoseDemo.exe --video &outvideo_path \ # --write_json &output_snippets_dir shutil.rmtree(output_snippets_dir, ignore_errors=True) os.makedirs(output_snippets_dir) os.system(command_line) # pack openpose ouputs video = utils.video.get_video_frames(outvideo_path) height, width, _ = video[0].shape # 这里可以修改label, label_index,创建标签的时候会使用的东西!!! video_info = utils.openpose.json_pack( output_snippets_dir, video_name, width, height, label_name, label_no) if not os.path.exists(output_sequence_dir): os.makedirs(output_sequence_dir) with open(output_sequence_path, 'w') as outfile: json.dump(video_info, outfile) if len(video_info['data']) == 0: print('{} Can not find pose estimation results.'.format(file_name)) return else: print('{} pose estimation complete.'.format(file_name)) if __name__ == '__main__': p = PreProcess() p.start()
从上往下总结几个需要注意的地方:
4.1 skvideo库的经典错误
报错
assert _HAS_FFMPEG, "Cannot find installation of real FFmpeg (which comes with ffprobe)." AssertionError: Cannot find installation of real FFmpeg (which comes with ffprobe).
解决办法
网络上下载ffmpeg,并导入import skvideo.io之前输入以下代码
import skvideo skvideo.setFFmpegPath('D:/ffmpeg/bin') # 参数是解压的ffmpeg/bin的路径
4.2 额外文件的准备
import st_gcn_tools import st_gcn_tools.utils as utils
从上面导入来看,我们需要st-gcn项目中的一些文件来完成目前的工作,大概如下:
5 生成label文件
## json2kinetics.py import json import os if __name__ == '__main__': train_json_path = './Data/resize/data' # './mydata/kinetics-skeleton/kinetics_train' # val_json_path = 'D:/VIDEOS/mydata/kinetics-skeleton/kinetics_val' # './mydata/kinetics-skeleton/kinetics_val' output_train_json_path = './Data/output/1.json' # output_val_json_path = 'D:/VIDEOS/mydata/kinetics-skeleton/kinetics_val_label.json' train_json_names = os.listdir(train_json_path) # val_json_names = os.listdir(val_json_path) train_label_json = dict() # val_label_json = dict() for file_name in train_json_names: name = os.path.splitext(file_name)[0] json_file_path = '{}/{}'.format(train_json_path, file_name) # D:/VIDEOS/mydata/kinetics-skeleton/kinetics_train/{file_name} json_file = json.load(open(json_file_path)) file_label = dict() if len(json_file['data']) == 0: file_label['has_skeleton'] = False else: file_label['has_skeleton'] = True file_label['label'] = json_file['label'] file_label['label_index'] = json_file['label_index'] train_label_json['{}'.format(name)] = file_label print('{} success'.format(file_name)) with open(output_train_json_path, 'w') as outfile: json.dump(train_label_json, outfile) # for file_name in val_json_names: # name = file_name.split('.')[0] # json_file_path = '{}/{}'.format(val_json_path, file_name) # json_file = json.load(open(json_file_path)) # # file_label = dict() # if len(json_file['data']) == 0: # file_label['has_skeleton'] = False # else: # file_label['has_skeleton'] = True # file_label['label'] = json_file['label'] # file_label['label_index'] = json_file['label_index'] # # val_label_json['{}'.format(name)] = file_label # # print('{} success'.format(file_name)) # with open(output_val_json_path, 'w') as outfile: # json.dump(val_label_json, outfile)
注释掉的部分是测试集的部分,如果需要把注释去掉
6 另一种尝试
上面提取骨骼点用的是OpenPoseDemo.exe文件,理论上直接使用源码也是可以的。经过尝试确定了需要解决的问题,记录一下:
- 源码输出的置信度只有两种,检测到的节点为2,检测不到的节点为0,这样归一化后任何检测到的点的值应该也是一样的,但是kinetics数据集中是不同的,不知道是哪里出了问题
- 我们输入到st-gcn中的骨骼坐标是经过归一化的
因为是学习为主,重点还是用源码