diff --git a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py index 421d32bfbbc..dda4bcc3283 100644 --- a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py +++ b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py @@ -126,7 +126,9 @@ def fused_moe( (2, 0, ((0, ), lambda x: x)), )) - min_latency_tensor = torch.empty(1) if min_latency_mode else torch.empty(0) + # TODO: set min_latency_mode always to False due to the error in the moe_kernels + min_latency_tensor = torch.empty(0) + # allocate workspace for profiling moe_runner = MoERunner( x_dtype=input.dtype,