From 40955b28751e261dad7171b3c345c6d1fea9f0a9 Mon Sep 17 00:00:00 2001 From: Yuhao Yang <47235274+yhyang201@users.noreply.github.com> Date: Tue, 19 May 2026 14:48:34 +0800 Subject: [PATCH] [Bug] Fix V4-Pro NaN on Blackwell by converting fp8_einsum input scale to ue8m0 (#25733) --- python/sglang/srt/models/deepseek_v4.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/sglang/srt/models/deepseek_v4.py b/python/sglang/srt/models/deepseek_v4.py index b8df0c3135e2..16d42526b6b3 100644 --- a/python/sglang/srt/models/deepseek_v4.py +++ b/python/sglang/srt/models/deepseek_v4.py @@ -594,6 +594,7 @@ def forward( o.reshape(T * G, D).contiguous(), group_size=128, ) + o_s = deep_gemm.ceil_to_ue8m0(o_s) output = torch.empty(T, G, R, device=o.device, dtype=torch.bfloat16) deep_gemm.fp8_einsum( "bhr,hdr->bhd",