diff --git a/python/sglang/srt/layers/attention/aiter_backend.py b/python/sglang/srt/layers/attention/aiter_backend.py index 44427ec81a2e..7833c2494cb1 100755 --- a/python/sglang/srt/layers/attention/aiter_backend.py +++ b/python/sglang/srt/layers/attention/aiter_backend.py @@ -2503,7 +2503,7 @@ def forward_decode( o = torch.empty_like(q, dtype=self.input_dtype) - max_kv_len = page_table.shape[1] + max_kv_len = page_table.shape[1] * self.page_size unified_attention( q=q.view(-1, layer.tp_q_head_num, layer.qk_head_dim),