From cc10a080f3592a303ea1b69b68c2ddd4d71346b3 Mon Sep 17 00:00:00 2001 From: mgoin Date: Mon, 2 Feb 2026 19:35:01 -0500 Subject: [PATCH] Disable RoutingMethodType.[Renormalize,RenormalizeNaive] TRTLLM per-tensor FP8 MoE Signed-off-by: mgoin --- .../layers/fused_moe/flashinfer_trtllm_moe.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py index a066535c51eb..43e02d510430 100644 --- a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py +++ b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py @@ -72,8 +72,10 @@ def _supports_routing_method( # NOTE(dbari): as above, potentially allow others here. return routing_method in [ RoutingMethodType.Llama4, - RoutingMethodType.Renormalize, - RoutingMethodType.RenormalizeNaive, + # NOTE(mgoin): Disabled to investigate accuracy issues. + # See https://github.com/vllm-project/vllm/issues/33532 + # RoutingMethodType.Renormalize, + # RoutingMethodType.RenormalizeNaive, ] else: raise ValueError("Unsupported quantization scheme.")