diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 5ce693efafa..85695b114bc 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -752,7 +752,7 @@ def forward_absorb( q_nope_out = q_nope_out.transpose(0, 1) k_nope = latent_cache[..., : self.kv_lora_rank] - k_nope = self.kv_a_layernorm(k_nope.contiguous()).unsqueeze(1) + k_nope = self.kv_a_layernorm(k_nope).unsqueeze(1) k_pe = latent_cache[..., self.kv_lora_rank :].unsqueeze(1) q_pe, k_pe = self.rotary_emb(positions, q_pe, k_pe)