diff --git a/python/sglang/srt/layers/quantization/marlin_utils_fp8.py b/python/sglang/srt/layers/quantization/marlin_utils_fp8.py
index 94326d71e54d..1e8e85be0131 100644
--- a/python/sglang/srt/layers/quantization/marlin_utils_fp8.py
+++ b/python/sglang/srt/layers/quantization/marlin_utils_fp8.py
@@ -62,7 +62,6 @@ def apply_fp8_marlin_linear(
         a=reshaped_x,
         c=None,
         b_q_weight=weight,
-        b_bias=bias,
         b_scales=weight_scale,
         global_scale=None,
         b_zeros=None,
@@ -77,6 +76,9 @@ def apply_fp8_marlin_linear(
         use_fp32_reduce=use_fp32_reduce,
     )
 
+    if bias is not None:
+        output.add_(bias)
+
     return output.reshape(out_shape)