Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
71adc02
[Feature] Pydantic validation for scheduler.py and structured_outputs.py
vrdn-23 Oct 9, 2025
df6b089
Merge remote-tracking branch 'origin/main' into vrdn-23/pydantic-conf…
vrdn-23 Oct 9, 2025
7de7d59
Fix `is_encoder_decoder`
hmellor Oct 10, 2025
b6f2a06
Fix deferred defaults
hmellor Oct 10, 2025
5984a27
Use `InitVar` to fix docs build
hmellor Oct 10, 2025
3b10a25
Merge branch 'main' into vrdn-23/pydantic-config-structured-speculative
vrdn-23 Oct 10, 2025
4e26198
Fixing tests
vrdn-23 Oct 10, 2025
cf2ce2e
Trying a before model validator
vrdn-23 Oct 11, 2025
5f19b02
Fix before model validator
vrdn-23 Oct 11, 2025
35c0611
hail mary
vrdn-23 Oct 11, 2025
e310ec4
Adding optionals
vrdn-23 Oct 11, 2025
900f6e0
Revert commits after 5984a27e7aeb7780942275ab8dc4e140697ff81b
hmellor Oct 12, 2025
38c69c8
Merge branch 'main' into pr/vrdn-23/26519
hmellor Oct 12, 2025
a803c79
Skip validation for max_num_batched_tokens because the fallback is dy…
hmellor Oct 12, 2025
313d352
Fix merge conflicts
vrdn-23 Oct 30, 2025
5f0f952
Fix merge conflicts
vrdn-23 Oct 30, 2025
4214e4d
Merge remote-tracking branch 'origin/main' into vrdn-23/pydantic-conf…
vrdn-23 Oct 30, 2025
2f7716a
Set model_max_len in model_validator
vrdn-23 Oct 31, 2025
775e21a
Merge branch 'main' into vrdn-23/pydantic-config-structured-speculative
vrdn-23 Oct 31, 2025
c4415aa
Do the same for max_num_seqs
vrdn-23 Oct 31, 2025
e274f05
Merge branch 'main' into vrdn-23/pydantic-config-structured-speculative
vrdn-23 Oct 31, 2025
6fb2ee0
Merge branch 'main' into pr/vrdn-23/26519
hmellor Oct 31, 2025
d68a8b7
Fix
hmellor Oct 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 31 additions & 31 deletions vllm/config/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import hashlib
from dataclasses import InitVar, field
from collections.abc import Callable
from dataclasses import InitVar
from typing import Any, Literal

from pydantic import SkipValidation, model_validator
from pydantic import Field, field_validator, model_validator
from pydantic.dataclasses import dataclass
from typing_extensions import Self

Expand All @@ -31,28 +32,28 @@ class SchedulerConfig:
runner_type: RunnerType = "generate"
"""The runner type to launch for the model."""

max_num_batched_tokens: SkipValidation[int] = None # type: ignore
max_num_batched_tokens: int = Field(default=None, ge=1)
"""Maximum number of tokens to be processed in a single iteration.

This config has no static default. If left unspecified by the user, it will
be set in `EngineArgs.create_engine_config` based on the usage context."""

max_num_seqs: SkipValidation[int] = None # type: ignore
max_num_seqs: int = Field(default=None, ge=1)
"""Maximum number of sequences to be processed in a single iteration.

This config has no static default. If left unspecified by the user, it will
be set in `EngineArgs.create_engine_config` based on the usage context."""

max_model_len: SkipValidation[int] = None # type: ignore
max_model_len: int = Field(default=None, ge=1)
"""Maximum length of a sequence (including prompt and generated text). This
is primarily set in `ModelConfig` and that value should be manually
duplicated here."""

max_num_partial_prefills: int = 1
max_num_partial_prefills: int = Field(default=1, ge=1)
"""For chunked prefill, the maximum number of sequences that can be
partially prefilled concurrently."""

max_long_partial_prefills: int = 1
max_long_partial_prefills: int = Field(default=1, ge=1)
"""For chunked prefill, the maximum number of prompts longer than
long_prefill_token_threshold that will be prefilled concurrently. Setting
this less than max_num_partial_prefills will allow shorter prompts to jump
Expand All @@ -62,7 +63,7 @@ class SchedulerConfig:
"""For chunked prefill, a request is considered long if the prompt is
longer than this number of tokens."""

num_lookahead_slots: int = 0
num_lookahead_slots: int = Field(default=0, ge=0)
"""The number of slots to allocate per sequence per
step, beyond the known token ids. This is used in speculative
decoding to store KV activations of tokens which may or may not be
Expand All @@ -71,7 +72,7 @@ class SchedulerConfig:
NOTE: This will be replaced by speculative config in the future; it is
present to enable correctness tests until then."""

enable_chunked_prefill: SkipValidation[bool] = None # type: ignore
enable_chunked_prefill: bool = Field(default=None)
"""If True, prefill requests can be chunked based
on the remaining max_num_batched_tokens."""

Expand All @@ -86,14 +87,14 @@ class SchedulerConfig:
"""

# TODO (ywang96): Make this configurable.
max_num_encoder_input_tokens: int = field(init=False)
max_num_encoder_input_tokens: int = Field(init=False)
"""Multimodal encoder compute budget, only used in V1.

NOTE: This is not currently configurable. It will be overridden by
max_num_batched_tokens in case max multimodal embedding size is larger."""

# TODO (ywang96): Make this configurable.
encoder_cache_size: int = field(init=False)
encoder_cache_size: int = Field(init=False)
"""Multimodal encoder cache size, only used in V1.

NOTE: This is not currently configurable. It will be overridden by
Expand All @@ -106,7 +107,7 @@ class SchedulerConfig:
- "priority" means requests are handled based on given priority (lower
value means earlier handling) and time of arrival deciding any ties)."""

chunked_prefill_enabled: bool = field(init=False)
chunked_prefill_enabled: bool = Field(init=False)
"""True if chunked prefill is enabled."""

disable_chunked_mm_input: bool = False
Expand Down Expand Up @@ -155,6 +156,20 @@ def compute_hash(self) -> str:
hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest()
return hash_str

@field_validator(
"max_num_batched_tokens",
"max_num_seqs",
"max_model_len",
"enable_chunked_prefill",
mode="wrap",
)
@classmethod
def _skip_none_validation(cls, value: Any, handler: Callable) -> Any:
"""Skip validation if the value is `None` when initialisation is delayed."""
if value is None:
return value
return handler(value)

def __post_init__(self, is_encoder_decoder: bool) -> None:
if self.max_model_len is None:
self.max_model_len = 8192
Expand Down Expand Up @@ -260,19 +275,7 @@ def _verify_args(self) -> Self:
self.max_num_seqs * self.max_model_len,
)

if self.num_lookahead_slots < 0:
raise ValueError(
"num_lookahead_slots "
f"({self.num_lookahead_slots}) must be greater than or "
"equal to 0."
)

if self.max_num_partial_prefills < 1:
raise ValueError(
f"max_num_partial_prefills ({self.max_num_partial_prefills}) "
"must be greater than or equal to 1."
)
elif self.max_num_partial_prefills > 1:
if self.max_num_partial_prefills > 1:
if not self.chunked_prefill_enabled:
raise ValueError(
"Chunked prefill must be enabled to set "
Expand All @@ -286,13 +289,10 @@ def _verify_args(self) -> Self:
f"than the max_model_len ({self.max_model_len})."
)

if (self.max_long_partial_prefills < 1) or (
self.max_long_partial_prefills > self.max_num_partial_prefills
):
if self.max_long_partial_prefills > self.max_num_partial_prefills:
raise ValueError(
f"max_long_partial_prefills ({self.max_long_partial_prefills}) "
"must be greater than or equal to 1 and less than or equal to "
f"max_num_partial_prefills ({self.max_num_partial_prefills})."
f"{self.max_long_partial_prefills=} must be less than or equal to "
f"{self.max_num_partial_prefills=}."
)

return self
7 changes: 5 additions & 2 deletions vllm/config/structured_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import hashlib
from typing import Any, Literal
from typing import Any, Literal, Self

from pydantic import model_validator
from pydantic.dataclasses import dataclass

from vllm.config.utils import config
Expand Down Expand Up @@ -56,7 +57,8 @@ def compute_hash(self) -> str:
hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest()
return hash_str

def __post_init__(self):
@model_validator(mode="after")
def _validate_structured_output_config(self) -> Self:
if self.disable_any_whitespace and self.backend not in ("xgrammar", "guidance"):
raise ValueError(
"disable_any_whitespace is only supported for "
Expand All @@ -67,3 +69,4 @@ def __post_init__(self):
"disable_additional_properties is only supported "
"for the guidance backend."
)
return self
2 changes: 1 addition & 1 deletion vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1813,7 +1813,7 @@ def _set_default_args(
incremental_prefill_supported = (
pooling_type is not None
and pooling_type.lower() == "last"
and is_causal
and bool(is_causal)
)

action = "Enabling" if incremental_prefill_supported else "Disabling"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import json
import re
import uuid
from collections.abc import Sequence
from typing import Any

import regex as re

from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
DeltaFunctionCall,
Expand Down