Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions python/sglang/srt/managers/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2073,15 +2073,18 @@ def prepare_mlp_sync_batch_raw(
num_tokens_for_logprob = num_tokens
else:
num_tokens = local_batch.extend_num_tokens
num_tokens_for_logprob = sum(
[
if local_batch.return_logprob:
num_tokens_for_logprob = sum(
# We should have at least 1 token for sample in every case.
max(extend_len - logprob_start_len, 1)
for logprob_start_len, extend_len in zip(
local_batch.extend_logprob_start_lens, local_batch.extend_lens
local_batch.extend_logprob_start_lens,
local_batch.extend_lens,
)
]
)
)
else:
# When return_logprob = False, only need last token per request
num_tokens_for_logprob = local_batch.batch_size()

if local_batch is None or local_batch.forward_mode.is_decode_or_idle():
can_cuda_graph = 1
Expand Down
Loading