Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions python/sglang/srt/managers/schedule_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -1558,6 +1558,38 @@ def new_page_count_next_decode(self, selected_indices: Optional[List[int]] = Non
else sum(1 for req in requests if (req.seqlen - 1) % page_size == 0)
)

def _calculate_eagle_extra_tokens(
self, selected_indices: Optional[List[int]] = None
) -> int:
"""Calculate extra tokens required for EAGLE speculative decoding.

For speculative decoding (EAGLE), we need additional memory for:
1. Draft stage: num_seqs * speculative_num_steps * topk tokens
2. Verify stage: num_seqs * speculative_num_draft_tokens tokens

Args:
selected_indices: Optional list of selected request indices.

Returns:
Extra tokens required for EAGLE, or 0 if EAGLE is not enabled.
"""
if not self.spec_algorithm.is_eagle():
return 0

server_args = get_global_server_args()
speculative_num_steps = server_args.speculative_num_steps
topk = server_args.speculative_eagle_topk
speculative_num_draft_tokens = server_args.speculative_num_draft_tokens

num_seqs = (
len(selected_indices) if selected_indices is not None else self.batch_size()
)
# Draft/Verify stage memory requirement
eagle_draft_tokens = num_seqs * speculative_num_steps * topk
eagle_verify_tokens = num_seqs * speculative_num_draft_tokens

return eagle_draft_tokens + eagle_verify_tokens

def check_decode_mem(
self, buf_multiplier=1, selected_indices: Optional[List[int]] = None
):
Expand All @@ -1567,6 +1599,9 @@ def check_decode_mem(
* self.token_to_kv_pool_allocator.page_size
)

if self.spec_algorithm.is_eagle():
num_tokens += self._calculate_eagle_extra_tokens(selected_indices)

evict_from_tree_cache(self.tree_cache, num_tokens)
return self._is_available_size_sufficient(num_tokens)

Expand Down
Loading