Skip to content

Commit

Permalink
review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Varun Sundar Rabindranath committed Sep 27, 2024
1 parent e37b9db commit 89e790c
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 10 deletions.
7 changes: 3 additions & 4 deletions vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,10 +913,9 @@ def _schedule_prefills(
break
elif can_allocate == AllocStatus.NEVER:
logger.warning(
"Input prompt (%d tokens) + lookahead slots "
"({num_lookahead_slots}) is too long"
" and exceeds the capacity of block_manager",
num_new_tokens)
"Input prompt (%d tokens) + lookahead slots (%d) is "
"too long and exceeds the capacity of block_manager",
num_new_tokens, num_lookahead_slots)
for seq in waiting_seqs:
seq.status = SequenceStatus.FINISHED_IGNORED
ignored_seq_groups.append(seq_group)
Expand Down
10 changes: 5 additions & 5 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,11 @@ def update_prefill_num_computed_tokens(
seq_group_meta: SequenceGroupMetadata, num_outputs: int,
is_first_step_output: Optional[bool]) -> None:
"""
When multi-step and chunked-prefill are enabled together, the
prefill sequence scheduled for multi-step execution turn into
decodes in the first step itself. This function accounts
for that conversion.
seq_group: SequenceGroup - A prefill seq_group
seq_group_meta: SequenceGroupMetadata - Metadata of the given
prefill seq_group
Expand All @@ -987,11 +992,6 @@ def update_prefill_num_computed_tokens(
must be None, as num_outputs > 1 indicates that outputs from
all the steps in multi-step are submitted in a single burst.
When multi-step is disabled, this value is always True.
When multi-step and chunked-prefill are enabled together, the
prefill sequence scheduled for multi-step execution turn into
decodes in the first step itself. This function accounts
for that conversion.
"""

assert seq_group_meta.is_prompt
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/sampling_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def prepare_multistep_tensors(self, num_queries: int, device: str,
Example:
Let 2 prompts and 2 decodes be scheduled together. Let the
num-tokens to process for the 2 prompts be 5 and 8 resply.
num-tokens to process for the 2 prompts be 5 and 8 respectively.
In that case, self.sampled_token_indices will be,
[4, 12, 13, 14] as it is constructed for the first-step in
Expand Down

0 comments on commit 89e790c

Please sign in to comment.