You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I have searched related issues but cannot get the expected help.
2. I have read the FAQ documentation but cannot get the expected help.
3. The bug has not been fixed in the latest version.
Describe the bug
I converted a version of the tensorrt model under Linux and Windows respectively, and used it for reasoning and found that the infer time was much slower than that of pytorch infer. The Pytorch infer time is 0.09s and the tensorrt infer time is 0.3s
Reproduction
pytorch_infer:
odel_mmseg =init_model(config, checkpoint, device=torch.device('cuda:0'))
img = cv2.imread("F:/AL_model/p4_20240806152513412_r1_c2.png")
for i in range(0,10):
start = time.time()
outputtensor = inference_model(model_mmseg, img)
pred_mask = outputtensor.pred_sem_seg.data.squeeze(
0).detach().cpu().numpy().astype(np.uint8)
end = time.time()
print(f"The time is {end -start}")
TRT infer:
import numpy as np
import cv2
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
import time
import ctypes
Checklist
Describe the bug
I converted a version of the tensorrt model under Linux and Windows respectively, and used it for reasoning and found that the infer time was much slower than that of pytorch infer. The Pytorch infer time is 0.09s and the tensorrt infer time is 0.3s
Reproduction
pytorch_infer:
odel_mmseg =init_model(config, checkpoint, device=torch.device('cuda:0'))
img = cv2.imread("F:/AL_model/p4_20240806152513412_r1_c2.png")
for i in range(0,10):
start = time.time()
outputtensor = inference_model(model_mmseg, img)
pred_mask = outputtensor.pred_sem_seg.data.squeeze(
0).detach().cpu().numpy().astype(np.uint8)
end = time.time()
print(f"The time is {end -start}")
TRT infer:
import numpy as np
import cv2
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
import time
import ctypes
定义输入和输出的维度
mmdeployop_dll_path = "E:\workshop\mmdeploy-1.3.1\mmdeploy\lib/mmdeploy_tensorrt_ops.dll" # 替换为你的
ctypes.CDLL(mmdeployop_dll_path)
engine_file = r'./end2end.engine'
logger= trt.Logger(trt.Logger.VERBOSE)
with open(engine_file, 'rb') as f, trt.Runtime(logger) as runtime:
trt.init_libnvinfer_plugins(logger, "mmdeploy");
engine = runtime.deserialize_cuda_engine(f.read())
imgsz=1024
context = engine.create_execution_context()
input_shape = (1,3, imgsz, imgsz)
output_shape = (1,1,imgsz, imgsz)
input_size = np.prod(input_shape) * np.dtype(np.float32).itemsize
output_size = np.prod(output_shape) * np.dtype(np.int32).itemsize
print("执行成功")
image = cv2.imread(image_file)
image = cv2.resize(image, (imgsz, imgsz))
image = image.astype(np.float32)
mean =[123.675,116.28,103.53]
std = [58.395,57.12,57.375]
image = np.transpose(image, (2, 0, 1))
for i in range(image.shape[0]): # Iterate over channels
image[i] = (image[i] - mean[i]) / std[i]
input_tensor = np.expand_dims(image, axis=0)
print('img_tensor.shape', input_tensor.shape)
input_tensor = np.ascontiguousarray(input_tensor)
d_input = cuda.mem_alloc(input_tensor.nbytes)
stream = cuda.Stream()
cuda.memcpy_htod_async(d_input, input_tensor, stream)
d_input = cuda.mem_alloc(image.nbytes)
output_data = np.zeros(output_shape, dtype=np.int32)
stream = cuda.Stream()
d_output = cuda.mem_alloc(output_data.nbytes)
context.set_input_shape("input", (origin_inputshape))
for i in range(0,10):
cuda.memcpy_htod_async(d_input, image.ravel(),stream)
stream.synchronize()
stream_handle = stream.handle
start =time.time()
context.execute_async_v2(bindings=[int(d_input), int(d_output)], stream_handle=stream_handle)
stream.synchronize()
end = time.time()
cuda.memcpy_dtoh_async(output_data, d_output, stream)
stream.synchronize()
Environment
Error traceback
No response
The text was updated successfully, but these errors were encountered: