diff --git a/tests/test_config.py b/tests/test_config.py index 5c01d652a17a..f3ca866ae1e3 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1301,6 +1301,20 @@ def test_scheduler_config_init(): # InitVar does not become an attribute print(SchedulerConfig.default_factory().max_model_len) + # Regression: max_num_scheduled_tokens must reject non-positive values at + # construction time (matches behavior of sibling max_num_batched_tokens + # and the existing <= 0 check on the speculative-decoding path; see #44123). + sched_kwargs = dict(max_model_len=2048, is_encoder_decoder=False) + # None is the sentinel meaning "default to max_num_batched_tokens". + SchedulerConfig(max_num_scheduled_tokens=None, **sched_kwargs) + # Positive integers are accepted. + SchedulerConfig(max_num_scheduled_tokens=4096, **sched_kwargs) + # Zero and negative integers are rejected by the field constraint. + with pytest.raises(ValidationError): + SchedulerConfig(max_num_scheduled_tokens=0, **sched_kwargs) + with pytest.raises(ValidationError): + SchedulerConfig(max_num_scheduled_tokens=-1, **sched_kwargs) + @pytest.mark.parametrize( ( diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index fb6951ea7dd1..7257c23bf3ad 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -53,7 +53,7 @@ class SchedulerConfig: In real usage, this should be set in `EngineArgs.create_engine_config`. """ - max_num_scheduled_tokens: int | None = None + max_num_scheduled_tokens: int | None = Field(default=None, ge=1) """Maximum number of tokens that the scheduler may issue in a single iteration. This is usually equal to max_num_batched_tokens, but can be smaller in cases