From 152dff0e7a2d0f5a246f7ccb6ac408874c13f2ca Mon Sep 17 00:00:00 2001 From: Brayden Zhong Date: Mon, 19 Jan 2026 17:26:25 -0500 Subject: [PATCH] more --- python/sglang/srt/layers/quantization/modelopt_quant.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/quantization/modelopt_quant.py b/python/sglang/srt/layers/quantization/modelopt_quant.py index 828a4343e770..3fc5d71e6460 100755 --- a/python/sglang/srt/layers/quantization/modelopt_quant.py +++ b/python/sglang/srt/layers/quantization/modelopt_quant.py @@ -130,7 +130,11 @@ def fp4_gemm( fp4_backend = get_fp4_gemm_runner_backend() if enable_flashinfer_fp4_gemm: # Use the remapping logic to convert SGLang backend names to FlashInfer API names - backend = fp4_backend.get_flashinfer_backend() + backend = ( + fp4_backend.get_flashinfer_backend() + if not fp4_backend.is_auto() + else "cutlass" + ) return flashinfer_fp4_gemm( input, weight, input_sf, weight_sf, alpha, out_dtype, backend=backend )