TensorRT官方文档说,/usr/src/tensorrt/samples/python/network_api_pytorch_mnist下有示例代码。实际上根本就没有。这里提供一个示例代码,供参考。
这个范例的具体位置是:/usr/local/lib/python3.5/site-
packages/tensorrt/examples/pytorch_to_trt #!/usr/bin/python import os from random import randint import numpy as np try: import pycuda.driver as cuda import pycuda.gpuarray as gpuarray import pycuda.autoinit except ImportError as err: raise ImportError("""ERROR: Failed to import module({}) Please make sure you have pycuda and the example dependencies installed. sudo apt-get install python(3)-pycuda pip install tensorrt[examples]""".format(err)) try: from PIL import Image except ImportError as err: raise ImportError("""ERROR: Failed to import module ({}) Please make sure you have Pillow installed. For installation instructions, see: http://pillow.readthedocs.io/en/stable/installation.html""".format(err)) import mnist try: import torch except ImportError as err: raise ImportError("""ERROR: Failed to import module ({}) Please make sure you have PyTorch installed. For installation instructions, see: http://pytorch.org/""".format(err)) # TensorRT must be imported after any frameworks in the case where # the framework has incorrect dependencies setup and is not updated # to use the versions of libraries that TensorRT imports. try: import tensorrt as trt except ImportError as err: raise ImportError("""ERROR: Failed to import module ({}) Please make sure you have the TensorRT Library installed and accessible in your LD_LIBRARY_PATH""".format(err)) G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO) ITERATIONS = 10 INPUT_LAYERS = ["data"] OUTPUT_LAYERS = ['prob'] INPUT_H = 28 INPUT_W = 28 OUTPUT_SIZE = 10 def create_pytorch_engine(max_batch_size, builder, dt, model): network = builder.create_network() data = network.add_input(INPUT_LAYERS[0], dt, (1, INPUT_H, INPUT_W)) assert(data) #------------- conv1_w = model['conv1.weight'].cpu().numpy().reshape(-1) conv1_b = model['conv1.bias'].cpu().numpy().reshape(-1) conv1 = network.add_convolution(data, 20, (5,5), conv1_w, conv1_b) assert(conv1) conv1.set_stride((1,1)) #------------- pool1 = network.add_pooling(conv1.get_output(0), trt.infer.PoolingType.MAX, (2,2)) assert(pool1) pool1.set_stride((2,2)) #------------- conv2_w = model['conv2.weight'].cpu().numpy().reshape(-1) conv2_b = model['conv2.bias'].cpu().numpy().reshape(-1) conv2 = network.add_convolution(pool1.get_output(0), 50, (5,5), conv2_w, conv2_b) assert(conv2) conv2.set_stride((1,1)) #------------- pool2 = network.add_pooling(conv2.get_output(0), trt.infer.PoolingType.MAX, (2,2)) assert(pool2) pool2.set_stride((2,2)) #------------- fc1_w = model['fc1.weight'].cpu().numpy().reshape(-1) fc1_b = model['fc1.bias'].cpu().numpy().reshape(-1) fc1 = network.add_fully_connected(pool2.get_output(0), 500, fc1_w, fc1_b) assert(fc1) #------------- relu1 = network.add_activation(fc1.get_output(0), trt.infer.ActivationType.RELU) assert(relu1) #------------- fc2_w = model['fc2.weight'].cpu().numpy().reshape(-1) fc2_b = model['fc2.bias'].cpu().numpy().reshape(-1) fc2 = network.add_fully_connected(relu1.get_output(0), OUTPUT_SIZE, fc2_w, fc2_b) assert(fc2) #------------- # Using log_softmax in training, cutting out log softmax here since no log softmax in TRT fc2.get_output(0).set_name(OUTPUT_LAYERS[0]) network.mark_output(fc2.get_output(0)) builder.set_max_batch_size(max_batch_size) builder.set_max_workspace_size(1 << 20) #builder.set_fp16_mode(True) engine = builder.build_cuda_engine(network) network.destroy() return engine def model_to_engine(model, max_batch_size): builder = trt.infer.create_infer_builder(G_LOGGER) engine = create_pytorch_engine(max_batch_size, builder, trt.infer.DataType.FLOAT, model) assert(engine) modelstream = engine.serialize() engine.destroy() builder.destroy() return modelstream # Run inference on device def infer(context, input_img, output_size, batch_size): # Load engine engine = context.get_engine() assert(engine.get_nb_bindings() == 2) # Convert input data to Float32 input_img = input_img.astype(np.float32) # Create output array to receive data output = np.empty(output_size, dtype = np.float32) # Allocate device memory d_input = cuda.mem_alloc(batch_size * input_img.nbytes) d_output = cuda.mem_alloc(batch_size * output.nbytes) bindings = [int(d_input), int(d_output)] stream = cuda.Stream() # Transfer input data to device cuda.memcpy_htod_async(d_input, input_img, stream) # Execute model context.enqueue(batch_size, bindings, stream.handle, None) # Transfer predictions back cuda.memcpy_dtoh_async(output, d_output, stream) # Return predictions return output def main(): path = dir_path = os.path.dirname(os.path.realpath(__file__)) # The mnist package is a simple PyTorch mnist example. mnist.learn() trains a network for # PyTorch's provided mnist dataset. mnist.get_trained_model() returns the state dictionary # of the trained model. We use this to demonstrate the full training to inference pipeline mnist.learn() model = mnist.get_trained_model() # Typically training and inference are seperated so using torch.save() and saving the # model's state dictionary and then using torch.load() to load the state dictionary # # e.g: # model = torch.load(path + "/trained_mnist.pyt") modelstream = model_to_engine(model, 1) runtime = trt.infer.create_infer_runtime(G_LOGGER) engine = runtime.deserialize_cuda_engine(modelstream.data(), modelstream.size(), None) if modelstream: modelstream.destroy() img, target = mnist.get_testcase() img = img.numpy() target = target.numpy() print("\n| TEST CASE | PREDICTION |") for i in range(ITERATIONS): img_in = img[i].ravel() target_in = target[i] context = engine.create_execution_context() out = infer(context, img_in, OUTPUT_SIZE, 1) print("|-----------|------------|") print("| " + str(target_in) + " | " + str(np.argmax(out)) + " |") print('') context.destroy() engine.destroy() runtime.destroy() if __name__ == "__main__": main()