Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1301,6 +1301,20 @@ def test_scheduler_config_init():
# InitVar does not become an attribute
print(SchedulerConfig.default_factory().max_model_len)

# Regression: max_num_scheduled_tokens must reject non-positive values at
# construction time (matches behavior of sibling max_num_batched_tokens
# and the existing <= 0 check on the speculative-decoding path; see #44123).
sched_kwargs = dict(max_model_len=2048, is_encoder_decoder=False)
# None is the sentinel meaning "default to max_num_batched_tokens".
SchedulerConfig(max_num_scheduled_tokens=None, **sched_kwargs)
# Positive integers are accepted.
SchedulerConfig(max_num_scheduled_tokens=4096, **sched_kwargs)
# Zero and negative integers are rejected by the field constraint.
with pytest.raises(ValidationError):
SchedulerConfig(max_num_scheduled_tokens=0, **sched_kwargs)
with pytest.raises(ValidationError):
SchedulerConfig(max_num_scheduled_tokens=-1, **sched_kwargs)


@pytest.mark.parametrize(
(
Expand Down
2 changes: 1 addition & 1 deletion vllm/config/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class SchedulerConfig:
In real usage, this should be set in `EngineArgs.create_engine_config`.
"""

max_num_scheduled_tokens: int | None = None
max_num_scheduled_tokens: int | None = Field(default=None, ge=1)
"""Maximum number of tokens that the scheduler may issue in a single iteration.

This is usually equal to max_num_batched_tokens, but can be smaller in cases
Expand Down
Loading