前段时间,我们给大家介绍了好基友 OpenPPL ,它是商汤开源的深度学习推理引擎,提供云原生环境下的 AI 模型多后端部署能力。
简单来说,用 OpenMMLab 开发的模型,可以通过 OpenPPL 高效可靠地运行在现有的 CPU、GPU 等计算平台上,为云端场景提供人工智能推理服务。
上次介绍后就有小伙伴问,有没有 OpenPPL 的教程?今天它来了!
本文将以目标检测工具箱 MMDetection 中的一个经典网络 Mask R-CNN (ICCV'2017) 为例,手把手教大家如何使用 OpenPPL 实现一个目标检测的推理任务!
Mask R-CNN 是一个强大的通用对象实例分割框架(object instance segmentation),它不仅可对图像中的目标进行检测,还可以对每一个目标给出一个高质量的分割结果。
Example Mask R-CNN output
本教程使用 python 代码进行,OpenPPL 支持 Python API,可以通过如下编译方式来生成 Python API:
./build.sh -DHPCC_USE_X86_64=ON -DPPLNN_ENABLE_PYTHON_API=ON
本文内容
准备模型
准备数据
构造并运行 runtime
将网络的输出画到原图上
完整代码展示
1. 准备模型
OpenPPL 原生支持 onnx 格式的模型,目前获取 onnx 模型的方式大致可以分为如下三种:
下载 torch 官方的 ONNX Model Zoo;
直接从训练框架转换得来,例如 torch 可以通过 torch.onnx.export() 接口来获得从 torch model 到 onnx model 的转换;
使用第三方库提供的转换工具来获得对应的 onnx 模型,例如在 OpenMMlab 中为每一个仓库都提供了一个 pytorch2onnx.py 的脚本;
这里我们使用 OpenMMlab 中 mmdetection 提供的 pytorch2onnx.py。
python3 tools/deployment/pytorch2onnx.py \ configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py \ mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth \ --output-file mask_rcnn.onnx \ --dynamic-export
使用这个脚本进行模型转换的时候,需要提供两个文件:
一个是 pytoch 的模型文件 checkpoint_file,另一个是模型的配置文件 config_file。
在命令行运行上面的命令,当屏幕输出下面这句话的时候我们的 onnx 模型已经生成好了。
Successfully exported ONNX model: mask_rcnn.onnx
在当前目录可以看到 mask_rcnn.onnx。
2. 准备数据
准备一张测试用的图 :
我们需要通过一系列的前处理来将图片转换成 OpenPPL 的输入数据;
def preprocess(img_file, input_file): img = cv2.imread(img_file, cv2.IMREAD_COLOR) cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) w_scale = 1200 / img.shape[1] h_scale = 800 / img.shape[0] # 1. resize img = cv2.resize(img, (1200, 800), interpolation=cv2.INTER_LINEAR) # 2. normalize mean = np.array([123.675,116.28,103.53]).reshape(1,-1).astype(np.float64) std = np.array([58.395,57.12,57.375]).reshape(1,-1).astype(np.float64) stdinv = 1 /std img = img.copy().astype(np.float32) cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) cv2.subtract(img, mean, img) cv2.multiply(img, stdinv, img) # 3. transpose img = img.transpose(2,0,1) img.tofile(input_file) return w_scale, h_scale
3. 构造并运行 runtime
目前 OpenPPL 的推理架构支持 x86 和 CUDA。这里以 x86 为例,当然也可以使用 CUDA。
def runmodel(input_file): # 1.create engine engines = [] x86_options = pplnn.X86EngineOptions() x86_engine = pplnn.X86EngineFactory.Create(x86_options) engines.append(pplnn.Engine(x86_engine)) # 2.create runtimebuilder runtime_builder = pplnn.OnnxRuntimeBuilderFactory.CreateFromFile("mask_rcnn.onnx", engines) if not runtime_builder: logging.error("create RuntimeBuilder failed.") sys.exit(-1) # 3.create runtime runtime = runtime_builder.CreateRuntime() if not runtime: logging.error("create Runtime instance failed.") sys.exit(-1) # 4.set input data tensor = runtime.GetInputTensor(0) shape = tensor.GetShape() in_data = np.fromfile(input_file,dtype=np.float32).reshape((1,3,800,1200)) status = tensor.ConvertFromHost(in_data) if status != pplcommon.RC_SUCCESS: logging.error("copy data to tensor[" + tensor.GetName() + "] failed: " + pplcommon.GetRetCodeStr(status)) sys.exit(-1) # 5.run status = runtime.Run() if status != pplcommon.RC_SUCCESS: logging.error("Run() failed: " + pplcommon.GetRetCodeStr(status)) sys.exit(-1) # 6. get output data for i in range(runtime.GetOutputCount()): tensor = runtime.GetOutputTensor(i) tensor_data = tensor.ConvertToHost() if not tensor_data: logging.error("copy data from tensor[" + tensor.GetName() + "] failed.") sys.exit(-1) if tensor.GetName() == 'dets': dets_data = np.array(tensor_data, copy=False) dets_data = dets_data.squeeze() if tensor.GetName() == 'labels': labels_data = np.array(tensor_data, copy=False) labels_data = labels_data.squeeze() if tensor.GetName() == 'masks': masks_data = np.array(tensor_data, copy=False) masks_data = masks_data.squeeze() return dets_data, labels_data, masks_data
4. 将网络的输出画到原图上
def postprocess(filename, dets_data, labels_data, masks_data, w_scale, h_scale, score_thr = 0.8): im = cv2.imread(filename, cv2.IMREAD_COLOR) scores = dets_data[:, -1] inds = scores > score_thr bboxes = dets_data[inds, :] labels = labels_data[inds] segms = masks_data[inds, ...] np.random.seed(42) mask_colors = [ np.random.randint(0, 256, (1, 3), dtype=np.uint8) for _ in range(max(labels) + 1) ] for i, (bbox, label) in enumerate(zip(bboxes, labels)): bbox_int = bbox.astype(np.int32) # resize box left = int(bbox_int[0] / w_scale) top = int(bbox_int[1] / h_scale) right = int(bbox_int[2] / w_scale) bottom = int(bbox_int[3] / h_scale) cv2.rectangle(im, (left, top), (right, bottom), (0,0,0), 2) cv2.putText(im, coco_classes[label] + ": " + str(round(bbox[4],2)), (left, top), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.0, (0,0,0)) if segms is not None: color_mask = mask_colors[labels[i]] mask = segms[i] mask = cv2.resize(mask, (im.shape[1], im.shape[0])) mask = mask.astype(bool) im[mask] = im[mask] * 0.5 + color_mask * 0.5 cv2.imwrite('res.png',im)
以上,我们便完成了使用 OpenPPL 完成一个简单的目标检测推理任务!
大家都去试试看吧!
附: 完整代码展示
可在「ppl.nn.demo示例」里获取所有sample:
https://github.com/openppl-public/ppl.nn/tree/master/samples
import sys import logging import cv2 import numpy as np from pyppl import nn as pplnn from pyppl import common as pplcommon def preprocess(img_file, input_file): img = cv2.imread(img_file, cv2.IMREAD_COLOR) cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) w_scale = 1200 / img.shape[1] h_scale = 800 / img.shape[0] img = cv2.resize(img, (1200, 800), interpolation=cv2.INTER_LINEAR) mean = np.array([123.675,116.28,103.53]).reshape(1,-1).astype(np.float64) std = np.array([58.395,57.12,57.375]).reshape(1,-1).astype(np.float64) stdinv = 1 /std img = img.copy().astype(np.float32) cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) cv2.subtract(img, mean, img) cv2.multiply(img, stdinv, img) img = img.transpose(2,0,1) img.tofile(input_file) return w_scale, h_scale coco_classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush' ] def postprocess(filename, dets_data, labels_data, masks_data, w_scale, h_scale, score_thr = 0.8): im = cv2.imread(filename, cv2.IMREAD_COLOR) scores = dets_data[:, -1] inds = scores > score_thr bboxes = dets_data[inds, :] labels = labels_data[inds] segms = masks_data[inds, ...] np.random.seed(42) mask_colors = [ np.random.randint(0, 256, (1, 3), dtype=np.uint8) for _ in range(max(labels) + 1) ] for i, (bbox, label) in enumerate(zip(bboxes, labels)): bbox_int = bbox.astype(np.int32) left = int(bbox_int[0] / w_scale) top = int(bbox_int[1] / h_scale) right = int(bbox_int[2] / w_scale) bottom = int(bbox_int[3] / h_scale) cv2.rectangle(im, (left, top), (right, bottom), (0,0,0), 2) cv2.putText(im, coco_classes[label] + ": " + str(round(bbox[4],2)), (left, top), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.0, (0,0,0)) if segms is not None: color_mask = mask_colors[labels[i]] mask = segms[i] mask = cv2.resize(mask, (im.shape[1], im.shape[0])) mask = mask.astype(bool) im[mask] = im[mask] * 0.5 + color_mask * 0.5 cv2.imwrite('res.png',im) def runmodel(input_file): engines = [] x86_options = pplnn.X86EngineOptions() x86_engine = pplnn.X86EngineFactory.Create(x86_options) engines.append(pplnn.Engine(x86_engine)) runtime_builder = pplnn.OnnxRuntimeBuilderFactory.CreateFromFile("mask_rcnn.onnx", engines) if not runtime_builder: logging.error("create RuntimeBuilder failed.") sys.exit(-1) runtime = runtime_builder.CreateRuntime() if not runtime: logging.error("create Runtime instance failed.") sys.exit(-1) tensor = runtime.GetInputTensor(0) shape = tensor.GetShape() in_data = np.fromfile(input_file,dtype=np.float32).reshape((1,3,800,1200)) status = tensor.ConvertFromHost(in_data) if status != pplcommon.RC_SUCCESS: logging.error("copy data to tensor[" + tensor.GetName() + "] failed: " + pplcommon.GetRetCodeStr(status)) sys.exit(-1) status = runtime.Run() if status != pplcommon.RC_SUCCESS: logging.error("Run() failed: " + pplcommon.GetRetCodeStr(status)) sys.exit(-1) for i in range(runtime.GetOutputCount()): tensor = runtime.GetOutputTensor(i) tensor_data = tensor.ConvertToHost() if not tensor_data: logging.error("copy data from tensor[" + tensor.GetName() + "] failed.") sys.exit(-1) if tensor.GetName() == 'dets': dets_data = np.array(tensor_data, copy=False) dets_data = dets_data.squeeze() if tensor.GetName() == 'labels': labels_data = np.array(tensor_data, copy=False) labels_data = labels_data.squeeze() if tensor.GetName() == 'masks': masks_data = np.array(tensor_data, copy=False) masks_data = masks_data.squeeze() return dets_data, labels_data, masks_data if __name__ == "__main__": logging.basicConfig(level=logging.INFO) # create input data input_file = 'input.bin' image_file = 'test.jpg' w_scale, h_scale= preprocess(image_file, input_file) # runmodel dets_data, labels_data, masks_data = runmodel(input_file) # postprocess postprocess(image_file, dets_data, labels_data, masks_data, w_scale, h_scale) logging.info("Run ok")
文章来源:公众号【OpenMMLab】
2021-09-29 18:48