GRCNN使用onnxruntime和tensorrt推理
下载GRCNN项目:https://github.com/skumra/robotic-grasping.git
导出onnx模型:
import torchnet = torch.load("trained-models/jacquard-rgbd-grconvnet3-drop0-ch32/epoch_42_iou_0.93")
x = torch.rand(1, 4, 300, 300).cuda()
torch.onnx.export(net, x, "./grcnn.onnx", opset_version = 13)
onnx模型结构如下:
onnxruntime推理
import cv2
import onnxruntime
import numpy as np
from skimage.feature import peak_local_maxdef process_data(rgb, depth, width, height, output_size):left = (width - output_size) // 2 top = (height - output_size) // 2right = (width + output_size) // 2 bottom = (height + output_size) // 2depth_img = depth[top:bottom, left:right]depth_img = np.clip((depth_img - depth_img.mean()), -1, 1)depth_img = depth_img.transpose(2, 0, 1)rgb_img = rgb[top:bottom, left:right]rgb_img = rgb_img.astype(np.float32) / 255.0rgb_img -= rgb_img.mean()rgb_img = rgb_img.transpose(2, 0, 1) ret = np.concatenate((np.expand_dims(depth_img, 0), np.expand_dims(rgb_img, 0)), axis=1)return np.concatenate((np.expand_dims(depth_img, 0), np.expand_dims(rgb_img, 0)), axis=1)if __name__ == '__main__':rgb = cv2.imread('data/Jacquard/e35c7e8c9f85cac42a2f0bc2931a19e/0_e35c7e8c9f85cac42a2f0bc2931a19e_RGB.png', -1)depth = cv2.imread('data/Jacquard/e35c7e8c9f85cac42a2f0bc2931a19e/0_e35c7e8c9f85cac42a2f0bc2931a19e_perfect_depth.tiff', -1)depth = np.expand_dims(np.array(depth), axis=2)input = process_data(rgb=rgb, depth=depth, width=1024, height=1024, output_size=300)onnx_session = onnxruntime.InferenceSession("grcnn.onnx", providers=['CPUExecutionProvider'])input_name = []for node in onnx_session.get_inputs():input_name.append(node.name)output_name = []for node in onnx_session.get_outputs():output_name.append(node.name)inputs = {}for name in input_name:inputs[name] = inputoutputs = onnx_session.run(None, inputs)q_img = outputs[0].squeeze()ang_img = (np.arctan2(outputs[2], outputs[1]) / 2.0).squeeze()width_img = outputs[3].squeeze() * 150.0q_img = cv2.GaussianBlur(q_img, (0,0), 2)ang_img = cv2.GaussianBlur(ang_img, (0,0), 2)width_img = cv2.GaussianBlur(width_img, (0,0), 1)local_max = peak_local_max(q_img, min_distance=20, threshold_abs=0.2, num_peaks=1) #128 220for grasp_point_array in local_max:grasp_point = tuple(grasp_point_array)grasp_angle = ang_img[grasp_point]width = width_img[grasp_point] /2print(grasp_point, grasp_angle, width)
输出
(184, 213) -0.23662478 30.98381233215332
tensorrt推理
import cv2
import numpy as np
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
from skimage.feature import peak_local_maxdef process_data(rgb, depth, width, height, output_size):left = (width - output_size) // 2 top = (height - output_size) // 2right = (width + output_size) // 2 bottom = (height + output_size) // 2depth_img = depth[top:bottom, left:right]depth_img = np.clip((depth_img - depth_img.mean()), -1, 1)depth_img = depth_img.transpose(2, 0, 1)rgb_img = rgb[top:bottom, left:right]rgb_img = rgb_img.astype(np.float32) / 255.0rgb_img -= rgb_img.mean()rgb_img = rgb_img.transpose(2, 0, 1) ret = np.concatenate((np.expand_dims(depth_img, 0), np.expand_dims(rgb_img, 0)), axis=1)return np.concatenate((np.expand_dims(depth_img, 0), np.expand_dims(rgb_img, 0)), axis=1)if __name__ == '__main__':logger = trt.Logger(trt.Logger.WARNING)with open("grcnn.engine", "rb") as f, trt.Runtime(logger) as runtime:engine = runtime.deserialize_cuda_engine(f.read())context = engine.create_execution_context()inputs_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(0)), dtype=np.float32)output0_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(1)), dtype=np.float32)output1_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(2)), dtype=np.float32)output2_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(3)), dtype=np.float32)output3_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(4)), dtype=np.float32)inputs_device = cuda.mem_alloc(inputs_host.nbytes)output0_device = cuda.mem_alloc(output0_host.nbytes)output1_device = cuda.mem_alloc(output1_host.nbytes)output2_device = cuda.mem_alloc(output2_host.nbytes)output3_device = cuda.mem_alloc(output3_host.nbytes)stream = cuda.Stream()rgb = cv2.imread('0_e35c7e8c9f85cac42a2f0bc2931a19e_RGB.png', -1)depth = cv2.imread('0_e35c7e8c9f85cac42a2f0bc2931a19e_perfect_depth.tiff', -1)depth = np.expand_dims(np.array(depth), axis=2)input = process_data(rgb=rgb, depth=depth, width=1024, height=1024, output_size=300)np.copyto(inputs_host, input.ravel())with engine.create_execution_context() as context:cuda.memcpy_htod_async(inputs_device, inputs_host, stream)context.execute_async_v2(bindings=[int(inputs_device), int(output0_device), int(output1_device), int(output2_device), int(output3_device)], stream_handle=stream.handle)cuda.memcpy_dtoh_async(output0_host, output0_device, stream)cuda.memcpy_dtoh_async(output1_host, output1_device, stream)cuda.memcpy_dtoh_async(output2_host, output2_device, stream)cuda.memcpy_dtoh_async(output3_host, output3_device, stream)stream.synchronize() q_img = output0_host.reshape(context.get_binding_shape(1)).squeeze()ang_img = (np.arctan2(output2_host.reshape(context.get_binding_shape(3)), output1_host.reshape(context.get_binding_shape(2))) / 2.0).squeeze()width_img = output3_host.reshape(context.get_binding_shape(4)).squeeze() * 150.0q_img = cv2.GaussianBlur(q_img, (0,0), 2)ang_img = cv2.GaussianBlur(ang_img, (0,0), 2)width_img = cv2.GaussianBlur(width_img, (0,0), 1)local_max = peak_local_max(q_img, min_distance=20, threshold_abs=0.2, num_peaks=1) #128 220for grasp_point_array in local_max:grasp_point = tuple(grasp_point_array)grasp_angle = ang_img[grasp_point]width = width_img[grasp_point] /2print(grasp_point, grasp_angle, width)