diff --git a/python/sglang/srt/models/deepseek_nextn.py b/python/sglang/srt/models/deepseek_nextn.py index 8d7bc7b1819..7ca945d3a98 100644 --- a/python/sglang/srt/models/deepseek_nextn.py +++ b/python/sglang/srt/models/deepseek_nextn.py @@ -107,7 +107,11 @@ def forward( ) if not forward_batch.forward_mode.is_idle(): - hidden_states, _ = self.shared_head.norm(hidden_states, residual) + if residual is not None: + hidden_states, _ = self.shared_head.norm(hidden_states, residual) + else: + hidden_states = self.shared_head.norm(hidden_states) + return hidden_states