install torch2trt
git clone https://github.com/NVIDIA-AI-IOT/torch2trt
cd torch2trt
sudo python setup.py install --plugins
具体代码1
from retinaface.models.retinaface import RetinaFace, PriorBox # 导入网络
import torch,os
from torch2trt import torch2trt
device = 'cuda' if torch.cuda.is_available() else 'cpu'
current_dir=os.path.dirname(os.path.abspath(__file__)) # 获取当前路径
cfg = {
'name': 'mobilenet0.25',
'min_sizes': [[16, 32], [64, 128], [256, 512]],
'steps': [8, 16, 32],
'variance': [0.1, 0.2],
'clip': False,
'loc_weight': 2.0,
'gpu_train': True,
'batch_size': 32,
'ngpu': 1,
'epoch': 250,
'decay1': 190,
'decay2': 220,
'image_size': 640,
'pretrain': True,
'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
'in_channel': 32,
'out_channel': 64
}
def load_model(model, pretrained_path, device):
print('Loading pretrained model from {}'.format(pretrained_path))
pretrained_dict = torch.load(pretrained_path, map_location=device)
if "state_dict" in pretrained_dict.keys():
pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
else:
pretrained_dict = remove_prefix(pretrained_dict, 'module.')
check_keys(model, pretrained_dict)
model.load_state_dict(pretrained_dict, strict=False)
def check_keys(model, pretrained_state_dict):
ckpt_keys = set(pretrained_state_dict.keys())
model_keys = set(model.state_dict().keys())
used_pretrained_keys = model_keys & ckpt_keys
unused_pretrained_keys = ckpt_keys - model_keys
missing_keys = model_keys - ckpt_keys
print('Missing keys:{}'.format(len(missing_keys)))
print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
print('Used keys:{}'.format(len(used_pretrained_keys)))
assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
return True
def remove_prefix(state_dict, prefix):
''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
print('remove prefix \'{}\''.format(prefix))
f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
return {f(key): value for key, value in state_dict.items()}
def create_engine(weights, device, eps=1e-3):
print("Create trt engine for retintaface...")
model = RetinaFace(cfg).to(device)
load_model(model, weights, device)
model.eval()
x = torch.ones((1, 3, cfg["image_size"], cfg["image_size"])).to(device) # cfg["image_size"=640 根据自己的模型输出设置的大小
model_trt = torch2trt(model, [x])
print("Ok. Check outputs...")
y = model(x)
y_trt = model_trt(x)
for out, out_trt in zip(y, y_trt):
if torch.max(torch.abs(out - out_trt)) > eps:
raise RuntimeError
os.makedirs(os.path.join(current_dir, "engines"), exist_ok=True)
torch.save(model_trt.state_dict(), os.path.join(current_dir, "engines", f"retina_trt_{device}.trt"))
print('trt create finish.......')
return model_trt
运行结果
具体代码2
from arcface.resnet import resnet_face18
import torch,os
from torch2trt import torch2trt
current_dir=os.path.dirname(os.path.abspath(__file__)) # 获取当前路径
def load_model(model, pretrained_path, device):
print('Loading pretrained model from {}'.format(pretrained_path))
pretrained_dict = torch.load(pretrained_path, map_location=device)
if "state_dict" in pretrained_dict.keys():
pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
else:
pretrained_dict = remove_prefix(pretrained_dict, 'module.')
check_keys(model, pretrained_dict)
model.load_state_dict(pretrained_dict)
def check_keys(model, pretrained_state_dict):
ckpt_keys = set(pretrained_state_dict.keys())
model_keys = set(model.state_dict().keys())
used_pretrained_keys = model_keys & ckpt_keys
unused_pretrained_keys = ckpt_keys - model_keys
missing_keys = model_keys - ckpt_keys
print('Missing keys:{}'.format(len(missing_keys)))
print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
print('Used keys:{}'.format(len(used_pretrained_keys)))
assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
return True
def remove_prefix(state_dict, prefix):
''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
print('remove prefix \'{}\''.format(prefix))
f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
return {f(key): value for key, value in state_dict.items()}
def create_engine(weights, device, eps=1e-3):
print("Create trt engine for retintaface...")
model = resnet_face18(use_se=True).cuda()
load_model(model, weights, device)
model.eval()
x = torch.ones((1, 1,128,128)).cuda() # cfg["image_size"=640 根据自己的模型输出设置的大小
model_trt = torch2trt(model, [x])
print('save')
os.makedirs(os.path.join(current_dir, "engines"), exist_ok=True)
torch.save(model_trt.state_dict(), os.path.join(current_dir, "engines", f"arcface_trt_256.trt"))
print('trt create finish.......')
return model_trt
device = 'cuda' if torch.cuda.is_available() else 'cpu'
weights='/home/lqs/Documents/Engineering_CYB/pth_onnx_model/resnet18_256_90.pth'
create_engine(weights, device, eps=1e-3)
运行结果
Create trt engine for retintaface...
Loading pretrained model from /home/lqs/Documents/Engineering_CYB/pth_onnx_model/resnet18_256_90.pth
remove prefix 'module.'
Missing keys:0
Unused checkpoint keys:0
Used keys:221
save
trt create finish.......
具体代码3
# -*- coding: utf-8 -*-
import torchvision
import torch
from torch2trt import torch2trt
data = torch.randn((1, 3, 224, 224)).cuda().half()
model = torchvision.models.resnet18(pretrained=True).cuda().half().eval()
output = model(data)
# pytorch -> tensorrt
model_trt = torch2trt(model, [data], fp16_mode=True)
output_trt = model_trt(data)
# compare
print('max error: %f' % float(torch.max(torch.abs(output - output_trt))))
print("mse :%f" % float((output - output_trt)**2))
# save tensorrt model
torch.save(model_trt.state_dict(), "resnet18_trt.pth")
# load tensorrt model
from torch2trt import TRTModule
model_trt = TRTModule()
model_trt.load_state_dict(torch.load('resnet18_trt.pth'))
# -*- coding: utf-8 -*-
import torchvision
import torch
from collections import OrderedDict
from torch2trt import torch2trt
from arcface.resnet import resnet_face18
device = 'cuda' if torch.cuda.is_available() else 'cpu'
data = torch.randn((1, 1, 128, 128)).cuda()
model = resnet_face18(use_se=True).cuda()
model_path = '/home/lqs/Documents/Engineering_CYB/pth_onnx_model/resnet18_256_90.pth'
state_dict = torch.load(model_path, map_location=device)
print(1)
mew_state_dict = OrderedDict()
model_dict = model.state_dict()
pretrained_dict = {k: v for k, v in state_dict.items() if (k in model_dict and 'fc' not in k)}
model_dict.update(pretrained_dict)
print(2)
model.load_state_dict(model_dict)
model.eval()
print(3)
output = model(data)
print(4)
# pytorch -> tensorrt
model_trt = torch2trt(model, [data], fp16_mode=True)
print('begin to save')
# save tensorrt model
torch.save(model_trt.state_dict(), "arcface_trt_256.trt")
运行结果
1
2
3
4
begin to save