From d34b93d33764ade7325f03b9cf36eea70649a316 Mon Sep 17 00:00:00 2001 From: Enrique Shockwave Date: Sun, 7 Dec 2025 23:29:43 +0000 Subject: [PATCH] fix sgl kernel marlin fp8 bias --- python/sglang/srt/layers/quantization/marlin_utils_fp8.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/quantization/marlin_utils_fp8.py b/python/sglang/srt/layers/quantization/marlin_utils_fp8.py index 94326d71e54d..cb112b3df1ad 100644 --- a/python/sglang/srt/layers/quantization/marlin_utils_fp8.py +++ b/python/sglang/srt/layers/quantization/marlin_utils_fp8.py @@ -62,7 +62,6 @@ def apply_fp8_marlin_linear( a=reshaped_x, c=None, b_q_weight=weight, - b_bias=bias, b_scales=weight_scale, global_scale=None, b_zeros=None, @@ -77,6 +76,9 @@ def apply_fp8_marlin_linear( use_fp32_reduce=use_fp32_reduce, ) + if bias is not None: + output.add_(bias) # In-place add + return output.reshape(out_shape)