Skip to content
33 changes: 21 additions & 12 deletions python/sglang/srt/managers/schedule_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,16 +793,23 @@ def alloc_paged_token_slots_extend(
raise RuntimeError(error_msg)
return out_cache_loc

@property
def new_page_count_next_decode(self):
page_size = self.token_to_kv_pool_allocator.page_size
if page_size == 1:
return len(self.reqs)
return sum(1 for req in self.reqs if req.seqlen % page_size == 0)

def alloc_paged_token_slots_decode(
self,
seq_lens: torch.Tensor,
last_loc: torch.Tensor,
):
if (
self.token_to_kv_pool_allocator.available_size()
< len(seq_lens) * self.token_to_kv_pool_allocator.page_size
):
if self.tree_cache is not None:
if self.tree_cache is not None:
if (
self.token_to_kv_pool_allocator.available_size()
< len(seq_lens) * self.token_to_kv_pool_allocator.page_size
):
self.tree_cache.evict(
len(seq_lens) * self.token_to_kv_pool_allocator.page_size,
)
Expand Down Expand Up @@ -1096,16 +1103,18 @@ def mix_with_running(self, running_batch: "ScheduleBatch"):
self.extend_logprob_start_lens.extend([0] * running_bs)

def check_decode_mem(self, buf_multiplier=1):
bs = len(self.reqs) * buf_multiplier
if self.token_to_kv_pool_allocator.available_size() >= bs:
return True

self.tree_cache.evict(bs)
tokens_required = (
self.new_page_count_next_decode
* buf_multiplier
* self.token_to_kv_pool_allocator.page_size
)

if self.token_to_kv_pool_allocator.available_size() >= bs:
if self.token_to_kv_pool_allocator.available_size() >= tokens_required:
return True

return False
self.tree_cache.evict(tokens_required)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be tokens_required - self.token_to_kv_pool_allocator.available_size()?


return self.token_to_kv_pool_allocator.available_size() >= tokens_required

def retract_decode(self, server_args: ServerArgs):
"""Retract the decoding requests when there is not enough memory."""
Expand Down