From 5e167cf4548735a8efcf383ff1c5666b58243f19 Mon Sep 17 00:00:00 2001 From: vnadathur Date: Wed, 19 Nov 2025 19:24:47 -0800 Subject: [PATCH 01/57] return factors; fix comments Updated all config classes to support an optional 'return_factors' argument in their compute_hash methods, allowing retrieval of hash factors instead of just the hash string. Signed-off-by: vnadathur Co-Authored-By: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- vllm/config/cache.py | 11 +++++++---- vllm/config/compilation.py | 22 ++++++++++++---------- vllm/config/device.py | 10 +++++----- vllm/config/ec_transfer.py | 10 +++++----- vllm/config/kv_transfer.py | 10 +++++----- vllm/config/load.py | 10 +++++----- vllm/config/lora.py | 10 +++++----- vllm/config/model.py | 17 +++++++++++++---- vllm/config/multimodal.py | 10 +++++----- vllm/config/observability.py | 10 +++++----- vllm/config/parallel.py | 11 +++++++---- vllm/config/pooler.py | 10 +++++----- vllm/config/scheduler.py | 10 +++++----- vllm/config/speculative.py | 10 +++++----- vllm/config/structured_outputs.py | 10 +++++----- vllm/config/utils.py | 3 ++- vllm/config/vllm.py | 7 +++++-- 17 files changed, 101 insertions(+), 80 deletions(-) diff --git a/vllm/config/cache.py b/vllm/config/cache.py index 2652c7c06ad0..ae12f6f1cc0c 100644 --- a/vllm/config/cache.py +++ b/vllm/config/cache.py @@ -7,7 +7,7 @@ from pydantic import Field, SkipValidation, field_validator from pydantic.dataclasses import dataclass -from vllm.config.utils import config +from vllm.config.utils import HashResult, config, get_hash_factors, hash_factors from vllm.logger import init_logger from vllm.utils.mem_constants import GiB_bytes from vllm.utils.mem_utils import get_cpu_memory @@ -147,7 +147,7 @@ class CacheConfig: 'native' (vLLM native CPU offloading), 'lmcache' This option must be used together with kv_offloading_size.""" - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -158,6 +158,9 @@ def compute_hash(self) -> str: graph from input ids/embeddings to the final hidden states, excluding anything before input ids/embeddings and after the final hidden states. + + This config uses an opt-out hash: start from every dataclass field and + then drop the `ignored_factors` below. """ ignored_factors = { # Runtime/derived knobs that don't affect compiled graph shape @@ -178,9 +181,9 @@ def compute_hash(self) -> str: "kv_sharing_fast_prefill", } - from vllm.config.utils import get_hash_factors, hash_factors - factors = get_hash_factors(self, ignored_factors) + if return_factors: + return factors if factors else [] return hash_factors(factors) def metrics_info(self): diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 1c3ef502f0f4..8f1efa741ea3 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -13,7 +13,7 @@ import vllm.envs as envs from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass -from vllm.config.utils import config +from vllm.config.utils import HashResult, config, get_hash_factors, hash_factors from vllm.logger import init_logger from vllm.platforms import current_platform from vllm.utils.import_utils import resolve_obj_by_qualname @@ -159,13 +159,16 @@ def default_fi_allreduce_fusion_max_size_mb() -> dict[int, float]: current_platform.get_device_capability().to_int(), {} ) - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ Produces a hash unique to the pass configuration. Any new fields that affect compilation should be added to the hash. Any future fields that don't affect compilation should be excluded. """ - return InductorPass.hash_dict(asdict(self)) + factors = asdict(self) + if return_factors: + return factors if factors else [] + return InductorPass.hash_dict(factors) def __post_init__(self) -> None: if not self.enable_noop: @@ -503,18 +506,17 @@ class CompilationConfig: "vllm::sparse_attn_indexer", ] - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ Provide a hash that uniquely identifies all the configs that affect the structure of the computation graph from input ids/embeddings to the final hidden states, excluding anything before input ids/embeddings and after the final hidden states. - """ - # Opt-out: default-include declared fields; keep a tiny exclude set; - # normalize types; keep SHA-256. For nested opaque configs, include a - # stable identifier (e.g., pass_config.compute_hash()) instead of object id. + This config follows the opt-out hashing pattern: start from every + dataclass field and remove the `ignored_factors` list below. + """ ignored_factors = { # Paths/dirs and runtime/metrics that don’t affect compiled graph "debug_dump_path", @@ -527,10 +529,10 @@ def compute_hash(self) -> str: "pass_config", # handled separately below } - from vllm.config.utils import get_hash_factors, hash_factors - factors = get_hash_factors(self, ignored_factors) factors["pass_config"] = self.pass_config.compute_hash() + if return_factors: + return factors if factors else [] return hash_factors(factors) def __repr__(self) -> str: diff --git a/vllm/config/device.py b/vllm/config/device.py index e85cd15de8cf..033ed126387d 100644 --- a/vllm/config/device.py +++ b/vllm/config/device.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import hashlib from dataclasses import field from typing import Any, Literal @@ -9,7 +8,7 @@ from pydantic import ConfigDict, SkipValidation from pydantic.dataclasses import dataclass -from vllm.config.utils import config +from vllm.config.utils import HashResult, config, hash_factors, normalize_value Device = Literal["auto", "cuda", "cpu", "tpu", "xpu"] @@ -29,7 +28,7 @@ class DeviceConfig: """Device type from the current platform. This is set in `__post_init__`.""" - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -45,8 +44,9 @@ def compute_hash(self) -> str: # the device/platform information will be summarized # by torch/vllm automatically. factors: list[Any] = [] - hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest() - return hash_str + if return_factors: + return factors if factors else [] + return hash_factors({"factors": normalize_value(factors)}) def __post_init__(self): if self.device == "auto": diff --git a/vllm/config/ec_transfer.py b/vllm/config/ec_transfer.py index d95236f818ab..36defba581ec 100644 --- a/vllm/config/ec_transfer.py +++ b/vllm/config/ec_transfer.py @@ -1,13 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import hashlib import uuid from dataclasses import field from typing import Any, Literal, get_args from pydantic.dataclasses import dataclass -from vllm.config.utils import config +from vllm.config.utils import HashResult, config, hash_factors, normalize_value ECProducer = Literal["ec_producer"] ECConsumer = Literal["ec_consumer"] @@ -60,7 +59,7 @@ class ECTransferConfig: """The Python module path to dynamically load the EC connector from. Only supported in V1.""" - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -75,8 +74,9 @@ def compute_hash(self) -> str: # no factors to consider. # this config will not affect the computation graph. factors: list[Any] = [] - hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest() - return hash_str + if return_factors: + return factors if factors else [] + return hash_factors({"factors": normalize_value(factors)}) def __post_init__(self) -> None: if self.engine_id is None: diff --git a/vllm/config/kv_transfer.py b/vllm/config/kv_transfer.py index dfd7ef63712a..7584aa951ca0 100644 --- a/vllm/config/kv_transfer.py +++ b/vllm/config/kv_transfer.py @@ -1,14 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import hashlib import uuid from dataclasses import field from typing import Any, Literal, get_args from pydantic.dataclasses import dataclass -from vllm.config.utils import config +from vllm.config.utils import HashResult, config, hash_factors, normalize_value KVProducer = Literal["kv_producer", "kv_both"] KVConsumer = Literal["kv_consumer", "kv_both"] @@ -64,7 +63,7 @@ class KVTransferConfig: enable_permute_local_kv: bool = False """Experiment feature flag to enable HND to NHD KV Transfer""" - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -79,8 +78,9 @@ def compute_hash(self) -> str: # no factors to consider. # this config will not affect the computation graph. factors: list[Any] = [] - hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest() - return hash_str + if return_factors: + return factors if factors else [] + return hash_factors({"factors": normalize_value(factors)}) def __post_init__(self) -> None: if self.engine_id is None: diff --git a/vllm/config/load.py b/vllm/config/load.py index e424f8c5edb6..cd1261a99e6a 100644 --- a/vllm/config/load.py +++ b/vllm/config/load.py @@ -1,13 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import hashlib from typing import TYPE_CHECKING, Any from pydantic import Field, field_validator from pydantic.dataclasses import dataclass -from vllm.config.utils import config +from vllm.config.utils import HashResult, config, hash_factors, normalize_value from vllm.logger import init_logger if TYPE_CHECKING: @@ -89,7 +88,7 @@ class LoadConfig: see original doc for `map_location` in https://pytorch.org/docs/stable/generated/torch.load.html """ - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -104,8 +103,9 @@ def compute_hash(self) -> str: # no factors to consider. # this config will not affect the computation graph. factors: list[Any] = [] - hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest() - return hash_str + if return_factors: + return factors if factors else [] + return hash_factors({"factors": normalize_value(factors)}) @field_validator("load_format", mode="after") def _lowercase_load_format(cls, load_format: str) -> str: diff --git a/vllm/config/lora.py b/vllm/config/lora.py index 84e92eef4007..8599ff689327 100644 --- a/vllm/config/lora.py +++ b/vllm/config/lora.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import hashlib from typing import TYPE_CHECKING, Any, ClassVar, Literal import torch @@ -9,7 +8,7 @@ from pydantic.dataclasses import dataclass from typing_extensions import Self -from vllm.config.utils import config +from vllm.config.utils import HashResult, config, hash_factors, normalize_value from vllm.logger import init_logger from vllm.platforms import current_platform @@ -70,7 +69,7 @@ class LoRAConfig: will be automatically assigned to 1-n with the names of the modalities in alphabetic order.""" - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -90,8 +89,9 @@ def compute_hash(self) -> str: factors.append(self.lora_extra_vocab_size) factors.append(self.lora_vocab_padding_size) - hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest() - return hash_str + if return_factors: + return factors if factors else [] + return hash_factors({"factors": normalize_value(factors)}) @model_validator(mode="after") def _validate_lora_config(self) -> Self: diff --git a/vllm/config/model.py b/vllm/config/model.py index d1e56a72a318..8c551dee3992 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -17,7 +17,13 @@ from vllm.config.multimodal import MMCacheType, MMEncoderTPMode, MultiModalConfig from vllm.config.pooler import PoolerConfig from vllm.config.scheduler import RunnerType -from vllm.config.utils import config, getattr_iter +from vllm.config.utils import ( + HashResult, + config, + getattr_iter, + get_hash_factors, + hash_factors, +) from vllm.logger import init_logger from vllm.platforms import current_platform from vllm.transformers_utils.config import ( @@ -311,7 +317,7 @@ class ModelConfig: skip_mm_profiling: InitVar[bool | None] = None video_pruning_rate: InitVar[float | None] = None - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -322,6 +328,9 @@ def compute_hash(self) -> str: graph from input ids/embeddings to the final hidden states, excluding anything before input ids/embeddings and after the final hidden states. + + This config is opt-out hashed: include every dataclass field except for + those explicitly listed in `ignored_factors`. """ ignored_factors = { "runner", @@ -363,9 +372,9 @@ def compute_hash(self) -> str: "skip_mm_profiling", } - from vllm.config.utils import get_hash_factors, hash_factors - factors = get_hash_factors(self, ignored_factors) + if return_factors: + return factors if factors else [] return hash_factors(factors) def _update_nested( diff --git a/vllm/config/multimodal.py b/vllm/config/multimodal.py index 9f62b35ed515..449ee16e2976 100644 --- a/vllm/config/multimodal.py +++ b/vllm/config/multimodal.py @@ -1,14 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import hashlib from collections.abc import Mapping from typing import TYPE_CHECKING, Any, Literal, TypeAlias from pydantic import ConfigDict, Field, field_validator, model_validator from pydantic.dataclasses import dataclass -from vllm.config.utils import config +from vllm.config.utils import HashResult, config, hash_factors, normalize_value if TYPE_CHECKING: from vllm.attention.backends.registry import AttentionBackendEnum @@ -193,7 +192,7 @@ def _validate_multimodal_config(self): ) return self - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -210,8 +209,9 @@ def compute_hash(self) -> str: if self.mm_encoder_attn_backend is not None else None ] - hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest() - return hash_str + if return_factors: + return factors if factors else [] + return hash_factors({"factors": normalize_value(factors)}) def get_limit_per_prompt(self, modality: str) -> int: """ diff --git a/vllm/config/observability.py b/vllm/config/observability.py index 564c4f7aed41..6fe4c1be6cb5 100644 --- a/vllm/config/observability.py +++ b/vllm/config/observability.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import hashlib from functools import cached_property from typing import Any, Literal, cast @@ -10,7 +9,7 @@ from pydantic.dataclasses import dataclass from vllm import version -from vllm.config.utils import config +from vllm.config.utils import HashResult, config, hash_factors, normalize_value DetailedTraceModules = Literal["model", "worker", "all"] @@ -63,7 +62,7 @@ def collect_model_execute_time(self) -> bool: or "all" in self.collect_detailed_traces ) - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -78,8 +77,9 @@ def compute_hash(self) -> str: # no factors to consider. # this config will not affect the computation graph. factors: list[Any] = [] - hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest() - return hash_str + if return_factors: + return factors if factors else [] + return hash_factors({"factors": normalize_value(factors)}) @field_validator("show_hidden_metrics_for_version") @classmethod diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index 4b0236d8de3f..72a2e82e2d48 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -11,7 +11,7 @@ from typing_extensions import Self import vllm.envs as envs -from vllm.config.utils import config +from vllm.config.utils import HashResult, config, get_hash_factors, hash_factors from vllm.logger import init_logger from vllm.model_executor.layers.batch_invariant import ( vllm_is_batch_invariant, @@ -454,7 +454,7 @@ def sync_kv_cache_memory_size(dp_group: ProcessGroup, kv_cache_memory: int) -> i torch.distributed.all_reduce(tensor, op=ReduceOp.MIN, group=dp_group) return tensor.item() - def compute_hash(self): + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ Provide a hash that uniquely identifies all the configs that affect the structure of the computation @@ -464,6 +464,9 @@ def compute_hash(self): This hash is also used for DP worker configuration validation to prevent hangs from mismatched collective communication patterns. + + This is an opt-out hash: begin with every dataclass field on the config + and drop the `ignored_factors` listed below. """ ignored_factors = { # Derived/runtime topology, networking, or launch details @@ -494,11 +497,11 @@ def compute_hash(self): "_api_process_rank", } - from vllm.config.utils import get_hash_factors, hash_factors - factors = get_hash_factors(self, ignored_factors) # Explicitly include backend affecting env factor as before factors["VLLM_ALL2ALL_BACKEND"] = str(envs.VLLM_ALL2ALL_BACKEND) + if return_factors: + return factors if factors else [] return hash_factors(factors) def __post_init__(self) -> None: diff --git a/vllm/config/pooler.py b/vllm/config/pooler.py index 6bece8d0785b..a4323cf65886 100644 --- a/vllm/config/pooler.py +++ b/vllm/config/pooler.py @@ -1,12 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import hashlib from typing import Any from pydantic.dataclasses import dataclass -from vllm.config.utils import config +from vllm.config.utils import HashResult, config, hash_factors, normalize_value from vllm.logger import init_logger logger = init_logger(__name__) @@ -87,7 +86,7 @@ def __post_init__(self): # raise deprecated warning for softmax and activation self.use_activation = get_use_activation(self) - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -102,8 +101,9 @@ def compute_hash(self) -> str: # no factors to consider. # this config will not affect the computation graph. factors: list[Any] = [] - hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest() - return hash_str + if return_factors: + return factors if factors else [] + return hash_factors({"factors": normalize_value(factors)}) def get_use_activation(o: object): diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index 8194295ffedb..d703321ed1da 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import hashlib from collections.abc import Callable from dataclasses import InitVar from typing import TYPE_CHECKING, Any, ClassVar, Literal, cast @@ -10,7 +9,7 @@ from pydantic.dataclasses import dataclass from typing_extensions import Self, deprecated -from vllm.config.utils import config +from vllm.config.utils import HashResult, config, hash_factors, normalize_value from vllm.logger import init_logger from vllm.utils.import_utils import resolve_obj_by_qualname @@ -172,7 +171,7 @@ def get_scheduler_cls(self) -> type["SchedulerInterface"]: return cast(type["SchedulerInterface"], self.scheduler_cls) return resolve_obj_by_qualname(self.scheduler_cls) - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -187,8 +186,9 @@ def compute_hash(self) -> str: # no factors to consider. # this config will not affect the computation graph. factors: list[Any] = [] - hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest() - return hash_str + if return_factors: + return factors if factors else [] + return hash_factors({"factors": normalize_value(factors)}) @field_validator("scheduler_cls", "async_scheduling", mode="wrap") @classmethod diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index 13a8632413d9..9919c8d0607f 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -2,7 +2,6 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import ast -import hashlib from typing import TYPE_CHECKING, Any, Literal, get_args from pydantic import Field, SkipValidation, model_validator @@ -10,7 +9,7 @@ from typing_extensions import Self from vllm.config.parallel import ParallelConfig -from vllm.config.utils import config +from vllm.config.utils import HashResult, config, hash_factors, normalize_value from vllm.logger import init_logger from vllm.utils.import_utils import LazyLoader, has_arctic_inference @@ -147,7 +146,7 @@ class SpeculativeConfig: tokens with estimated probability (based on frequency counts) greater than or equal to this value.""" - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -163,8 +162,9 @@ def compute_hash(self) -> str: # Eagle3 affects the computation graph because it returns intermediate # hidden states in addition to the final hidden state. factors.append(self.method == "eagle3") - hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest() - return hash_str + if return_factors: + return factors if factors else [] + return hash_factors({"factors": normalize_value(factors)}) @staticmethod def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig: diff --git a/vllm/config/structured_outputs.py b/vllm/config/structured_outputs.py index 9530d3d81e15..6ada688ce700 100644 --- a/vllm/config/structured_outputs.py +++ b/vllm/config/structured_outputs.py @@ -1,14 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import hashlib from typing import Any, Literal from pydantic import model_validator from pydantic.dataclasses import dataclass from typing_extensions import Self -from vllm.config.utils import config +from vllm.config.utils import HashResult, config, hash_factors, normalize_value StructuredOutputsBackend = Literal[ "auto", "xgrammar", "guidance", "outlines", "lm-format-enforcer" @@ -43,7 +42,7 @@ class StructuredOutputsConfig: enable_in_reasoning: bool = False """Whether to use structured input for reasoning.""" - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -58,8 +57,9 @@ def compute_hash(self) -> str: # no factors to consider. # this config will not affect the computation graph. factors: list[Any] = [] - hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest() - return hash_str + if return_factors: + return factors if factors else [] + return hash_factors({"factors": normalize_value(factors)}) @model_validator(mode="after") def _validate_structured_output_config(self) -> Self: diff --git a/vllm/config/utils.py b/vllm/config/utils.py index 02f2b75f608f..7b2b3c156d77 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -26,6 +26,7 @@ ConfigType = type[DataclassInstance] ConfigT = TypeVar("ConfigT", bound=ConfigType) +HashResult = str | dict[str, object] | list[Any] def config(cls: ConfigT) -> ConfigT: @@ -156,7 +157,7 @@ def is_init_field(cls: ConfigType, name: str) -> bool: @runtime_checkable class SupportsHash(Protocol): - def compute_hash(self) -> str: ... + def compute_hash(self, *, return_factors: bool = False) -> HashResult: ... class SupportsMetricsInfo(Protocol): diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index d64e315b4fe3..95cb3cc4fff4 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -40,7 +40,7 @@ from .scheduler import SchedulerConfig from .speculative import SpeculativeConfig from .structured_outputs import StructuredOutputsConfig -from .utils import SupportsHash, config +from .utils import HashResult, SupportsHash, config if TYPE_CHECKING: from transformers import PretrainedConfig @@ -117,7 +117,7 @@ class VllmConfig: instance_id: str = "" """The ID of the vLLM instance.""" - def compute_hash(self) -> str: + def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -204,6 +204,9 @@ def compute_hash(self) -> str: vllm_factors.append("None") factors.append(vllm_factors) + if return_factors: + return vllm_factors if vllm_factors else [] + hash_str = hashlib.md5( str(factors).encode(), usedforsecurity=False ).hexdigest()[:10] From dc5980a97387082a973eb087590075ebdf396d00 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Thu, 20 Nov 2025 02:05:40 -0500 Subject: [PATCH 02/57] refactored code Signed-off-by: WorldExplored Co-Authored-By: vnadathur <236933696+vnadathur@users.noreply.github.com> --- vllm/config/cache.py | 8 +-- vllm/config/compilation.py | 4 +- vllm/config/device.py | 2 +- vllm/config/ec_transfer.py | 2 +- vllm/config/kv_transfer.py | 2 +- vllm/config/load.py | 2 +- vllm/config/lora.py | 2 +- vllm/config/model.py | 10 +-- vllm/config/multimodal.py | 2 +- vllm/config/observability.py | 2 +- vllm/config/parallel.py | 7 +- vllm/config/pooler.py | 2 +- vllm/config/scheduler.py | 2 +- vllm/config/speculative.py | 2 +- vllm/config/structured_outputs.py | 2 +- vllm/config/utils.py | 2 +- vllm/config/vllm.py | 116 +++++++++++------------------- vllm/envs.py | 12 +++- 18 files changed, 77 insertions(+), 104 deletions(-) diff --git a/vllm/config/cache.py b/vllm/config/cache.py index ae12f6f1cc0c..e0d8992a57c2 100644 --- a/vllm/config/cache.py +++ b/vllm/config/cache.py @@ -149,9 +149,9 @@ class CacheConfig: def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ - WARNING: Whenever a new field is added to this config, - ensure that it is included in the factors list if - it affects the computation graph. + WARNING: Whenever a new field is added to this config, review + `ignored_factors` to decide whether the field should be excluded. + All other dataclass fields participate in the hash automatically. Provide a hash that uniquely identifies all the configs that affect the structure of the computation @@ -183,7 +183,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: factors = get_hash_factors(self, ignored_factors) if return_factors: - return factors if factors else [] + return factors or None return hash_factors(factors) def metrics_info(self): diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 8f1efa741ea3..d92f610669e5 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -167,7 +167,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ factors = asdict(self) if return_factors: - return factors if factors else [] + return factors or None return InductorPass.hash_dict(factors) def __post_init__(self) -> None: @@ -532,7 +532,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: factors = get_hash_factors(self, ignored_factors) factors["pass_config"] = self.pass_config.compute_hash() if return_factors: - return factors if factors else [] + return factors or None return hash_factors(factors) def __repr__(self) -> str: diff --git a/vllm/config/device.py b/vllm/config/device.py index 033ed126387d..7634b0c6549a 100644 --- a/vllm/config/device.py +++ b/vllm/config/device.py @@ -45,7 +45,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: # by torch/vllm automatically. factors: list[Any] = [] if return_factors: - return factors if factors else [] + return factors or None return hash_factors({"factors": normalize_value(factors)}) def __post_init__(self): diff --git a/vllm/config/ec_transfer.py b/vllm/config/ec_transfer.py index 36defba581ec..34c3730dbea3 100644 --- a/vllm/config/ec_transfer.py +++ b/vllm/config/ec_transfer.py @@ -75,7 +75,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: # this config will not affect the computation graph. factors: list[Any] = [] if return_factors: - return factors if factors else [] + return factors or None return hash_factors({"factors": normalize_value(factors)}) def __post_init__(self) -> None: diff --git a/vllm/config/kv_transfer.py b/vllm/config/kv_transfer.py index 7584aa951ca0..6d625da718aa 100644 --- a/vllm/config/kv_transfer.py +++ b/vllm/config/kv_transfer.py @@ -79,7 +79,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: # this config will not affect the computation graph. factors: list[Any] = [] if return_factors: - return factors if factors else [] + return factors or None return hash_factors({"factors": normalize_value(factors)}) def __post_init__(self) -> None: diff --git a/vllm/config/load.py b/vllm/config/load.py index cd1261a99e6a..14ae0b27f51d 100644 --- a/vllm/config/load.py +++ b/vllm/config/load.py @@ -104,7 +104,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: # this config will not affect the computation graph. factors: list[Any] = [] if return_factors: - return factors if factors else [] + return factors or None return hash_factors({"factors": normalize_value(factors)}) @field_validator("load_format", mode="after") diff --git a/vllm/config/lora.py b/vllm/config/lora.py index 8599ff689327..b6d8309af76d 100644 --- a/vllm/config/lora.py +++ b/vllm/config/lora.py @@ -90,7 +90,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: factors.append(self.lora_vocab_padding_size) if return_factors: - return factors if factors else [] + return factors or None return hash_factors({"factors": normalize_value(factors)}) @model_validator(mode="after") diff --git a/vllm/config/model.py b/vllm/config/model.py index 8c551dee3992..8d2d1e3e6783 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -20,8 +20,8 @@ from vllm.config.utils import ( HashResult, config, - getattr_iter, get_hash_factors, + getattr_iter, hash_factors, ) from vllm.logger import init_logger @@ -319,9 +319,9 @@ class ModelConfig: def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ - WARNING: Whenever a new field is added to this config, - ensure that it is included in the factors list if - it affects the computation graph. + WARNING: Whenever a new field is added to this config, review + `ignored_factors` to decide whether that field must be excluded. + Every other dataclass field automatically participates in the hash. Provide a hash that uniquely identifies all the configs that affect the structure of the computation @@ -374,7 +374,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: factors = get_hash_factors(self, ignored_factors) if return_factors: - return factors if factors else [] + return factors or None return hash_factors(factors) def _update_nested( diff --git a/vllm/config/multimodal.py b/vllm/config/multimodal.py index 449ee16e2976..27a56ef5db53 100644 --- a/vllm/config/multimodal.py +++ b/vllm/config/multimodal.py @@ -210,7 +210,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: else None ] if return_factors: - return factors if factors else [] + return factors or None return hash_factors({"factors": normalize_value(factors)}) def get_limit_per_prompt(self, modality: str) -> int: diff --git a/vllm/config/observability.py b/vllm/config/observability.py index 6fe4c1be6cb5..8a6866811f5b 100644 --- a/vllm/config/observability.py +++ b/vllm/config/observability.py @@ -78,7 +78,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: # this config will not affect the computation graph. factors: list[Any] = [] if return_factors: - return factors if factors else [] + return factors or None return hash_factors({"factors": normalize_value(factors)}) @field_validator("show_hidden_metrics_for_version") diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index 72a2e82e2d48..2875988ca0e3 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -465,8 +465,9 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: This hash is also used for DP worker configuration validation to prevent hangs from mismatched collective communication patterns. - This is an opt-out hash: begin with every dataclass field on the config - and drop the `ignored_factors` listed below. + When adding new fields to this config, review `ignored_factors` to + decide whether they should be excluded. All other dataclass fields are + included automatically by the opt-out hashing scheme. """ ignored_factors = { # Derived/runtime topology, networking, or launch details @@ -501,7 +502,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: # Explicitly include backend affecting env factor as before factors["VLLM_ALL2ALL_BACKEND"] = str(envs.VLLM_ALL2ALL_BACKEND) if return_factors: - return factors if factors else [] + return factors or None return hash_factors(factors) def __post_init__(self) -> None: diff --git a/vllm/config/pooler.py b/vllm/config/pooler.py index a4323cf65886..2508d0431205 100644 --- a/vllm/config/pooler.py +++ b/vllm/config/pooler.py @@ -102,7 +102,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: # this config will not affect the computation graph. factors: list[Any] = [] if return_factors: - return factors if factors else [] + return factors or None return hash_factors({"factors": normalize_value(factors)}) diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index d703321ed1da..b37cb75e584e 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -187,7 +187,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: # this config will not affect the computation graph. factors: list[Any] = [] if return_factors: - return factors if factors else [] + return factors or None return hash_factors({"factors": normalize_value(factors)}) @field_validator("scheduler_cls", "async_scheduling", mode="wrap") diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index 9919c8d0607f..142e380579a6 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -163,7 +163,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: # hidden states in addition to the final hidden state. factors.append(self.method == "eagle3") if return_factors: - return factors if factors else [] + return factors or None return hash_factors({"factors": normalize_value(factors)}) @staticmethod diff --git a/vllm/config/structured_outputs.py b/vllm/config/structured_outputs.py index 6ada688ce700..85ad529a1d68 100644 --- a/vllm/config/structured_outputs.py +++ b/vllm/config/structured_outputs.py @@ -58,7 +58,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: # this config will not affect the computation graph. factors: list[Any] = [] if return_factors: - return factors if factors else [] + return factors or None return hash_factors({"factors": normalize_value(factors)}) @model_validator(mode="after") diff --git a/vllm/config/utils.py b/vllm/config/utils.py index 7b2b3c156d77..e8f66b6f102e 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -26,7 +26,7 @@ ConfigType = type[DataclassInstance] ConfigT = TypeVar("ConfigT", bound=ConfigType) -HashResult = str | dict[str, object] | list[Any] +HashResult = str | dict[str, object] | list[Any] | None def config(cls: ConfigT) -> ConfigT: diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 95cb3cc4fff4..ca327b9b1068 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -3,8 +3,6 @@ import copy import getpass -import hashlib -import json import os import tempfile import threading @@ -40,7 +38,7 @@ from .scheduler import SchedulerConfig from .speculative import SpeculativeConfig from .structured_outputs import StructuredOutputsConfig -from .utils import HashResult, SupportsHash, config +from .utils import HashResult, SupportsHash, config, hash_factors, normalize_value if TYPE_CHECKING: from transformers import PretrainedConfig @@ -119,9 +117,9 @@ class VllmConfig: def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ - WARNING: Whenever a new field is added to this config, - ensure that it is included in the factors list if - it affects the computation graph. + WARNING: Whenever a new field is added to this config, ensure the field + contributes to the `factors` dictionary below if it affects the + computation graph. Provide a hash that uniquely identifies all the configs that affect the structure of the computation @@ -129,88 +127,54 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: excluding anything before input ids/embeddings and after the final hidden states. """ - factors: list[Any] = [] - - # summarize vllm config - vllm_factors: list[Any] = [] from vllm import __version__ - vllm_factors.append(__version__) - if self.model_config: - vllm_factors.append(self.model_config.compute_hash()) - else: - vllm_factors.append("None") - if self.cache_config: - vllm_factors.append(self.cache_config.compute_hash()) - else: - vllm_factors.append("None") - if self.parallel_config: - vllm_factors.append(self.parallel_config.compute_hash()) - else: - vllm_factors.append("None") - if self.scheduler_config: - vllm_factors.append(self.scheduler_config.compute_hash()) - else: - vllm_factors.append("None") - if self.device_config: - vllm_factors.append(self.device_config.compute_hash()) - else: - vllm_factors.append("None") - if self.load_config: - vllm_factors.append(self.load_config.compute_hash()) - else: - vllm_factors.append("None") - if self.lora_config: - vllm_factors.append(self.lora_config.compute_hash()) + def _hash_optional(config_obj: SupportsHash | None) -> HashResult: + return config_obj.compute_hash() if config_obj is not None else None + + factors: dict[str, Any] = {} + factors["version"] = __version__ + factors["model_config"] = _hash_optional(self.model_config) + factors["cache_config"] = _hash_optional(self.cache_config) + factors["parallel_config"] = _hash_optional(self.parallel_config) + factors["scheduler_config"] = _hash_optional(self.scheduler_config) + factors["device_config"] = _hash_optional(self.device_config) + factors["load_config"] = _hash_optional(self.load_config) + lora_hash = _hash_optional(self.lora_config) + factors["lora_config"] = lora_hash + if lora_hash is not None: # LoRA creates static buffers based on max_num_batched_tokens. # The tensor sizes and strides get captured in the torch.compile # graph explicitly. - vllm_factors.append(str(self.scheduler_config.max_num_batched_tokens)) - else: - vllm_factors.append("None") - if self.speculative_config: - vllm_factors.append(self.speculative_config.compute_hash()) - else: - vllm_factors.append("None") - if self.structured_outputs_config: - vllm_factors.append(self.structured_outputs_config.compute_hash()) - else: - vllm_factors.append("None") - vllm_factors.append(self.observability_config.compute_hash()) + factors["lora_scheduler_max_num_batched_tokens"] = ( + self.scheduler_config.max_num_batched_tokens + if self.scheduler_config is not None + else None + ) + factors["speculative_config"] = _hash_optional(self.speculative_config) + factors["structured_outputs_config"] = _hash_optional( + self.structured_outputs_config + ) + factors["observability_config"] = self.observability_config.compute_hash() if self.quant_config: pass # should be captured by model_config.quantization - if self.compilation_config: - vllm_factors.append(self.compilation_config.compute_hash()) - else: - vllm_factors.append("None") - if self.kv_transfer_config: - vllm_factors.append(self.kv_transfer_config.compute_hash()) - else: - vllm_factors.append("None") - if self.ec_transfer_config: - vllm_factors.append(self.ec_transfer_config.compute_hash()) - else: - vllm_factors.append("None") + factors["compilation_config"] = _hash_optional(self.compilation_config) + factors["kv_transfer_config"] = _hash_optional(self.kv_transfer_config) + factors["ec_transfer_config"] = _hash_optional(self.ec_transfer_config) + + additional_config_value: HashResult = None if self.additional_config: - if isinstance(additional_config := self.additional_config, dict): - additional_config_hash = hashlib.md5( - json.dumps(additional_config, sort_keys=True).encode(), - usedforsecurity=False, - ).hexdigest() + additional_config = self.additional_config + if isinstance(additional_config, dict): + additional_config_value = normalize_value(additional_config) else: - additional_config_hash = additional_config.compute_hash() - vllm_factors.append(additional_config_hash) - else: - vllm_factors.append("None") - factors.append(vllm_factors) + additional_config_value = additional_config.compute_hash() + factors["additional_config"] = additional_config_value if return_factors: - return vllm_factors if vllm_factors else [] + return factors or None - hash_str = hashlib.md5( - str(factors).encode(), usedforsecurity=False - ).hexdigest()[:10] - return hash_str + return hash_factors(factors) def pad_for_cudagraph(self, batch_size: int) -> int: # if batch_size > self.compilation_config.max_cudagraph_capture_size, diff --git a/vllm/envs.py b/vllm/envs.py index 1ff620af5722..a89ad989b617 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -10,6 +10,8 @@ from collections.abc import Callable from typing import TYPE_CHECKING, Any, Literal +from vllm.config.utils import hash_factors, normalize_value + if TYPE_CHECKING: VLLM_HOST_IP: str = "" VLLM_PORT: int | None = None @@ -1610,8 +1612,6 @@ def compile_factors() -> dict[str, object]: "CUDA_VISIBLE_DEVICES", } - from vllm.config.utils import normalize_value - factors: dict[str, object] = {} for factor, getter in environment_variables.items(): if factor in ignored_factors: @@ -1655,3 +1655,11 @@ def compile_factors() -> dict[str, object]: factors[var] = normalize_value(os.getenv(var)) return factors + + +def compute_hash(*, return_factors: bool = False): + """Return a canonical hash for the environment compile factors.""" + factors = compile_factors() + if return_factors: + return factors if factors else None + return hash_factors(factors) From 7b7fe2e607eddf6dcbc8c5199d96b0453278c3e5 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Thu, 20 Nov 2025 15:03:28 -0500 Subject: [PATCH 03/57] fixed vllm.py Signed-off-by: WorldExplored Co-Authored-By: vnadathur <236933696+vnadathur@users.noreply.github.com> --- vllm/config/vllm.py | 116 +++++++++++++++++++++++++++++--------------- 1 file changed, 76 insertions(+), 40 deletions(-) diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index ca327b9b1068..95cb3cc4fff4 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -3,6 +3,8 @@ import copy import getpass +import hashlib +import json import os import tempfile import threading @@ -38,7 +40,7 @@ from .scheduler import SchedulerConfig from .speculative import SpeculativeConfig from .structured_outputs import StructuredOutputsConfig -from .utils import HashResult, SupportsHash, config, hash_factors, normalize_value +from .utils import HashResult, SupportsHash, config if TYPE_CHECKING: from transformers import PretrainedConfig @@ -117,9 +119,9 @@ class VllmConfig: def compute_hash(self, *, return_factors: bool = False) -> HashResult: """ - WARNING: Whenever a new field is added to this config, ensure the field - contributes to the `factors` dictionary below if it affects the - computation graph. + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. Provide a hash that uniquely identifies all the configs that affect the structure of the computation @@ -127,54 +129,88 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: excluding anything before input ids/embeddings and after the final hidden states. """ + factors: list[Any] = [] + + # summarize vllm config + vllm_factors: list[Any] = [] from vllm import __version__ - def _hash_optional(config_obj: SupportsHash | None) -> HashResult: - return config_obj.compute_hash() if config_obj is not None else None - - factors: dict[str, Any] = {} - factors["version"] = __version__ - factors["model_config"] = _hash_optional(self.model_config) - factors["cache_config"] = _hash_optional(self.cache_config) - factors["parallel_config"] = _hash_optional(self.parallel_config) - factors["scheduler_config"] = _hash_optional(self.scheduler_config) - factors["device_config"] = _hash_optional(self.device_config) - factors["load_config"] = _hash_optional(self.load_config) - lora_hash = _hash_optional(self.lora_config) - factors["lora_config"] = lora_hash - if lora_hash is not None: + vllm_factors.append(__version__) + if self.model_config: + vllm_factors.append(self.model_config.compute_hash()) + else: + vllm_factors.append("None") + if self.cache_config: + vllm_factors.append(self.cache_config.compute_hash()) + else: + vllm_factors.append("None") + if self.parallel_config: + vllm_factors.append(self.parallel_config.compute_hash()) + else: + vllm_factors.append("None") + if self.scheduler_config: + vllm_factors.append(self.scheduler_config.compute_hash()) + else: + vllm_factors.append("None") + if self.device_config: + vllm_factors.append(self.device_config.compute_hash()) + else: + vllm_factors.append("None") + if self.load_config: + vllm_factors.append(self.load_config.compute_hash()) + else: + vllm_factors.append("None") + if self.lora_config: + vllm_factors.append(self.lora_config.compute_hash()) # LoRA creates static buffers based on max_num_batched_tokens. # The tensor sizes and strides get captured in the torch.compile # graph explicitly. - factors["lora_scheduler_max_num_batched_tokens"] = ( - self.scheduler_config.max_num_batched_tokens - if self.scheduler_config is not None - else None - ) - factors["speculative_config"] = _hash_optional(self.speculative_config) - factors["structured_outputs_config"] = _hash_optional( - self.structured_outputs_config - ) - factors["observability_config"] = self.observability_config.compute_hash() + vllm_factors.append(str(self.scheduler_config.max_num_batched_tokens)) + else: + vllm_factors.append("None") + if self.speculative_config: + vllm_factors.append(self.speculative_config.compute_hash()) + else: + vllm_factors.append("None") + if self.structured_outputs_config: + vllm_factors.append(self.structured_outputs_config.compute_hash()) + else: + vllm_factors.append("None") + vllm_factors.append(self.observability_config.compute_hash()) if self.quant_config: pass # should be captured by model_config.quantization - factors["compilation_config"] = _hash_optional(self.compilation_config) - factors["kv_transfer_config"] = _hash_optional(self.kv_transfer_config) - factors["ec_transfer_config"] = _hash_optional(self.ec_transfer_config) - - additional_config_value: HashResult = None + if self.compilation_config: + vllm_factors.append(self.compilation_config.compute_hash()) + else: + vllm_factors.append("None") + if self.kv_transfer_config: + vllm_factors.append(self.kv_transfer_config.compute_hash()) + else: + vllm_factors.append("None") + if self.ec_transfer_config: + vllm_factors.append(self.ec_transfer_config.compute_hash()) + else: + vllm_factors.append("None") if self.additional_config: - additional_config = self.additional_config - if isinstance(additional_config, dict): - additional_config_value = normalize_value(additional_config) + if isinstance(additional_config := self.additional_config, dict): + additional_config_hash = hashlib.md5( + json.dumps(additional_config, sort_keys=True).encode(), + usedforsecurity=False, + ).hexdigest() else: - additional_config_value = additional_config.compute_hash() - factors["additional_config"] = additional_config_value + additional_config_hash = additional_config.compute_hash() + vllm_factors.append(additional_config_hash) + else: + vllm_factors.append("None") + factors.append(vllm_factors) if return_factors: - return factors or None + return vllm_factors if vllm_factors else [] - return hash_factors(factors) + hash_str = hashlib.md5( + str(factors).encode(), usedforsecurity=False + ).hexdigest()[:10] + return hash_str def pad_for_cudagraph(self, batch_size: int) -> int: # if batch_size > self.compilation_config.max_cudagraph_capture_size, From f602d96935e516d891adb3b51ccfefc816585dec Mon Sep 17 00:00:00 2001 From: vnadathur Date: Thu, 20 Nov 2025 13:07:44 -0800 Subject: [PATCH 04/57] refactor compute_hash() to compile_factors() Signed-off-by: vnadathur Co-Authored-By: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- tests/compile/fullgraph/test_toy_llama.py | 2 +- tests/config/test_config_utils.py | 8 +++--- tests/config/test_multimodal_config.py | 4 +-- vllm/compilation/backends.py | 10 +++---- vllm/compilation/caching.py | 4 +-- vllm/compilation/compiler_interface.py | 8 +++--- vllm/compilation/pass_manager.py | 2 +- vllm/config/cache.py | 6 ++-- vllm/config/compilation.py | 10 +++---- vllm/config/device.py | 2 +- vllm/config/ec_transfer.py | 2 +- vllm/config/kv_transfer.py | 2 +- vllm/config/load.py | 2 +- vllm/config/lora.py | 2 +- vllm/config/model.py | 6 ++-- vllm/config/multimodal.py | 2 +- vllm/config/observability.py | 2 +- vllm/config/parallel.py | 6 ++-- vllm/config/pooler.py | 2 +- vllm/config/scheduler.py | 2 +- vllm/config/speculative.py | 2 +- vllm/config/structured_outputs.py | 2 +- vllm/config/utils.py | 8 +++--- vllm/config/vllm.py | 34 +++++++++++------------ vllm/distributed/eplb/eplb_state.py | 2 +- vllm/envs.py | 2 +- vllm/v1/engine/core.py | 2 +- vllm/v1/engine/utils.py | 4 +-- 28 files changed, 70 insertions(+), 70 deletions(-) diff --git a/tests/compile/fullgraph/test_toy_llama.py b/tests/compile/fullgraph/test_toy_llama.py index 915fbc6ce7f3..7b6e97981a13 100644 --- a/tests/compile/fullgraph/test_toy_llama.py +++ b/tests/compile/fullgraph/test_toy_llama.py @@ -45,7 +45,7 @@ class LlamaConfig: tractable_init: bool = False random_seed: int = 0 - def compute_hash(self) -> str: + def compile_factors(self) -> str: factors: list[Any] = [] for k, v in self.__dict__.items(): if k == "random_seed": diff --git a/tests/config/test_config_utils.py b/tests/config/test_config_utils.py index 1277c7e64eb2..12ba1a5059e3 100644 --- a/tests/config/test_config_utils.py +++ b/tests/config/test_config_utils.py @@ -6,7 +6,7 @@ import pytest -from vllm.config.utils import get_hash_factors, hash_factors, normalize_value +from vllm.config.utils import get_compile_factors, hash_factors, normalize_value # Helpers @@ -25,7 +25,7 @@ def expected_path(p_str: str = ".") -> str: return p.expanduser().resolve().as_posix() -# Minimal dataclass to test get_hash_factors. +# Minimal dataclass to test get_compile_factors. # Avoid importing heavy vLLM configs. @dataclass class SimpleConfig: @@ -136,8 +136,8 @@ def test_enum_vs_int_disambiguation(): assert enum_val == "raw_logits" # Build factor dicts from configs with int vs enum - f_int = get_hash_factors(SimpleConfig(1), set()) - f_enum = get_hash_factors(SimpleConfig(DummyLogprobsMode.RAW_LOGITS), set()) + f_int = get_compile_factors(SimpleConfig(1), set()) + f_enum = get_compile_factors(SimpleConfig(DummyLogprobsMode.RAW_LOGITS), set()) # The int case remains a primitive value assert f_int["a"] == 1 # The enum case becomes a tagged tuple ("module.QualName", "raw_logits") diff --git a/tests/config/test_multimodal_config.py b/tests/config/test_multimodal_config.py index 3d02893e52f1..90ae2a49d280 100644 --- a/tests/config/test_multimodal_config.py +++ b/tests/config/test_multimodal_config.py @@ -18,8 +18,8 @@ def test_mm_encoder_attn_backend_invalid(): def test_mm_encoder_attn_backend_hash_updates(): - base_hash = MultiModalConfig().compute_hash() + base_hash = MultiModalConfig().compile_factors() overridden_hash = MultiModalConfig( mm_encoder_attn_backend=AttentionBackendEnum.FLASH_ATTN - ).compute_hash() + ).compile_factors() assert base_hash != overridden_hash diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 1e66f21ff638..4818ab29c3c0 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -93,8 +93,8 @@ def __init__(self, compilation_config: CompilationConfig): self.compilation_config = compilation_config self.compiler = make_compiler(compilation_config) - def compute_hash(self, vllm_config: VllmConfig) -> str: - return self.compiler.compute_hash(vllm_config) + def compile_factors(self, vllm_config: VllmConfig) -> str: + return self.compiler.compile_factors(vllm_config) @contextmanager def compile_context(self, runtime_shape: int | None = None): @@ -590,8 +590,8 @@ def __call__( env_factors = envs.compile_factors() env_hash = hash_factors(env_factors) # Compute config/compiler/code hashes once and reuse - config_hash = vllm_config.compute_hash() - compiler_hash = self.compiler_manager.compute_hash(vllm_config) + config_hash = vllm_config.compile_factors() + compiler_hash = self.compiler_manager.compile_factors(vllm_config) forward_code_files = list(sorted(self.compilation_config.traced_files)) logger.debug( @@ -621,7 +621,7 @@ def __call__( # graph. factors = [env_hash, config_hash, code_hash, compiler_hash] # Use SHA-256 for cache key hashing to be consistent across - # compute_hash functions. Truncate for a short cache dir name. + # compile_factors functions. Truncate for a short cache dir name. hash_key = hashlib.sha256(str(factors).encode()).hexdigest()[:10] cache_dir = os.path.join( envs.VLLM_CACHE_ROOT, "torch_compile_cache", hash_key diff --git a/vllm/compilation/caching.py b/vllm/compilation/caching.py index 16e34c2711e9..29aef8fd2aac 100644 --- a/vllm/compilation/caching.py +++ b/vllm/compilation/caching.py @@ -138,12 +138,12 @@ def compilation_config_hash_factors(vllm_config: VllmConfig) -> list[str]: factors = [] # 0. factors come from the env, for example, The values of # VLLM_PP_LAYER_PARTITION will affect the computation graph. - env_hash = envs.compute_hash() + env_hash = envs.compile_factors() factors.append(env_hash) # 1. factors come from the vllm_config (it mainly summarizes how the # model is created) - config_hash = vllm_config.compute_hash() + config_hash = vllm_config.compile_factors() factors.append(config_hash) return factors diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py index 11cf0f85c178..a340b9636b2a 100644 --- a/vllm/compilation/compiler_interface.py +++ b/vllm/compilation/compiler_interface.py @@ -46,12 +46,12 @@ def initialize_cache( """ pass - def compute_hash(self, vllm_config: VllmConfig) -> str: + def compile_factors(self, vllm_config: VllmConfig) -> str: """ Gather all the relevant information from the vLLM config, to compute a hash so that we can cache the compiled model. - See [`VllmConfig.compute_hash`][vllm.config.VllmConfig.compute_hash] + See [`VllmConfig.compile_factors`][vllm.config.VllmConfig.compile_factors] to check what information is already considered by default. This function should only consider the information that is specific to the compiler. @@ -195,7 +195,7 @@ class InductorStandaloneAdaptor(CompilerInterface): def __init__(self, save_format: Literal["binary", "unpacked"]): self.save_format = save_format - def compute_hash(self, vllm_config: VllmConfig) -> str: + def compile_factors(self, vllm_config: VllmConfig) -> str: factors = get_inductor_factors() hash_str = hashlib.md5( str(factors).encode(), usedforsecurity=False @@ -284,7 +284,7 @@ class InductorAdaptor(CompilerInterface): name = "inductor" - def compute_hash(self, vllm_config: VllmConfig) -> str: + def compile_factors(self, vllm_config: VllmConfig) -> str: factors = get_inductor_factors() hash_str = hashlib.md5( str(factors).encode(), usedforsecurity=False diff --git a/vllm/compilation/pass_manager.py b/vllm/compilation/pass_manager.py index fe2547d7feca..11d7ea1e4a63 100644 --- a/vllm/compilation/pass_manager.py +++ b/vllm/compilation/pass_manager.py @@ -127,7 +127,7 @@ def uuid(self): affects compilation caching. Its uuid depends on the UUIDs of all dependent passes and the pass config. See InductorPass for more info. """ - state = {"pass_config": self.pass_config.compute_hash(), "passes": []} + state = {"pass_config": self.pass_config.compile_factors(), "passes": []} for pass_ in self.passes: state["passes"].append(pass_.uuid()) state["passes"].append(self.fix_functionalization.uuid()) diff --git a/vllm/config/cache.py b/vllm/config/cache.py index e0d8992a57c2..8e2952750e92 100644 --- a/vllm/config/cache.py +++ b/vllm/config/cache.py @@ -7,7 +7,7 @@ from pydantic import Field, SkipValidation, field_validator from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config, get_hash_factors, hash_factors +from vllm.config.utils import HashResult, config, get_compile_factors, hash_factors from vllm.logger import init_logger from vllm.utils.mem_constants import GiB_bytes from vllm.utils.mem_utils import get_cpu_memory @@ -147,7 +147,7 @@ class CacheConfig: 'native' (vLLM native CPU offloading), 'lmcache' This option must be used together with kv_offloading_size.""" - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, review `ignored_factors` to decide whether the field should be excluded. @@ -181,7 +181,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: "kv_sharing_fast_prefill", } - factors = get_hash_factors(self, ignored_factors) + factors = get_compile_factors(self, ignored_factors) if return_factors: return factors or None return hash_factors(factors) diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index d92f610669e5..929eb84004a9 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -13,7 +13,7 @@ import vllm.envs as envs from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass -from vllm.config.utils import HashResult, config, get_hash_factors, hash_factors +from vllm.config.utils import HashResult, config, get_compile_factors, hash_factors from vllm.logger import init_logger from vllm.platforms import current_platform from vllm.utils.import_utils import resolve_obj_by_qualname @@ -159,7 +159,7 @@ def default_fi_allreduce_fusion_max_size_mb() -> dict[int, float]: current_platform.get_device_capability().to_int(), {} ) - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ Produces a hash unique to the pass configuration. Any new fields that affect compilation should be added to the hash. @@ -506,7 +506,7 @@ class CompilationConfig: "vllm::sparse_attn_indexer", ] - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ Provide a hash that uniquely identifies all the configs that affect the structure of the computation @@ -529,8 +529,8 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: "pass_config", # handled separately below } - factors = get_hash_factors(self, ignored_factors) - factors["pass_config"] = self.pass_config.compute_hash() + factors = get_compile_factors(self, ignored_factors) + factors["pass_config"] = self.pass_config.compile_factors() if return_factors: return factors or None return hash_factors(factors) diff --git a/vllm/config/device.py b/vllm/config/device.py index 7634b0c6549a..350fa1f37a58 100644 --- a/vllm/config/device.py +++ b/vllm/config/device.py @@ -28,7 +28,7 @@ class DeviceConfig: """Device type from the current platform. This is set in `__post_init__`.""" - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/ec_transfer.py b/vllm/config/ec_transfer.py index 34c3730dbea3..01981d22bfc5 100644 --- a/vllm/config/ec_transfer.py +++ b/vllm/config/ec_transfer.py @@ -59,7 +59,7 @@ class ECTransferConfig: """The Python module path to dynamically load the EC connector from. Only supported in V1.""" - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/kv_transfer.py b/vllm/config/kv_transfer.py index 6d625da718aa..0dc0a5d27059 100644 --- a/vllm/config/kv_transfer.py +++ b/vllm/config/kv_transfer.py @@ -63,7 +63,7 @@ class KVTransferConfig: enable_permute_local_kv: bool = False """Experiment feature flag to enable HND to NHD KV Transfer""" - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/load.py b/vllm/config/load.py index 14ae0b27f51d..2c276690e6e5 100644 --- a/vllm/config/load.py +++ b/vllm/config/load.py @@ -88,7 +88,7 @@ class LoadConfig: see original doc for `map_location` in https://pytorch.org/docs/stable/generated/torch.load.html """ - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/lora.py b/vllm/config/lora.py index b6d8309af76d..1dac16929678 100644 --- a/vllm/config/lora.py +++ b/vllm/config/lora.py @@ -69,7 +69,7 @@ class LoRAConfig: will be automatically assigned to 1-n with the names of the modalities in alphabetic order.""" - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/model.py b/vllm/config/model.py index 8d2d1e3e6783..8c625ba182ab 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -20,7 +20,7 @@ from vllm.config.utils import ( HashResult, config, - get_hash_factors, + get_compile_factors, getattr_iter, hash_factors, ) @@ -317,7 +317,7 @@ class ModelConfig: skip_mm_profiling: InitVar[bool | None] = None video_pruning_rate: InitVar[float | None] = None - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, review `ignored_factors` to decide whether that field must be excluded. @@ -372,7 +372,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: "skip_mm_profiling", } - factors = get_hash_factors(self, ignored_factors) + factors = get_compile_factors(self, ignored_factors) if return_factors: return factors or None return hash_factors(factors) diff --git a/vllm/config/multimodal.py b/vllm/config/multimodal.py index 27a56ef5db53..35ce12af9f56 100644 --- a/vllm/config/multimodal.py +++ b/vllm/config/multimodal.py @@ -192,7 +192,7 @@ def _validate_multimodal_config(self): ) return self - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/observability.py b/vllm/config/observability.py index 8a6866811f5b..50d904fb41f6 100644 --- a/vllm/config/observability.py +++ b/vllm/config/observability.py @@ -62,7 +62,7 @@ def collect_model_execute_time(self) -> bool: or "all" in self.collect_detailed_traces ) - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index 2875988ca0e3..a967e166ef17 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -11,7 +11,7 @@ from typing_extensions import Self import vllm.envs as envs -from vllm.config.utils import HashResult, config, get_hash_factors, hash_factors +from vllm.config.utils import HashResult, config, get_compile_factors, hash_factors from vllm.logger import init_logger from vllm.model_executor.layers.batch_invariant import ( vllm_is_batch_invariant, @@ -454,7 +454,7 @@ def sync_kv_cache_memory_size(dp_group: ProcessGroup, kv_cache_memory: int) -> i torch.distributed.all_reduce(tensor, op=ReduceOp.MIN, group=dp_group) return tensor.item() - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ Provide a hash that uniquely identifies all the configs that affect the structure of the computation @@ -498,7 +498,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: "_api_process_rank", } - factors = get_hash_factors(self, ignored_factors) + factors = get_compile_factors(self, ignored_factors) # Explicitly include backend affecting env factor as before factors["VLLM_ALL2ALL_BACKEND"] = str(envs.VLLM_ALL2ALL_BACKEND) if return_factors: diff --git a/vllm/config/pooler.py b/vllm/config/pooler.py index 2508d0431205..f37f46b210d5 100644 --- a/vllm/config/pooler.py +++ b/vllm/config/pooler.py @@ -86,7 +86,7 @@ def __post_init__(self): # raise deprecated warning for softmax and activation self.use_activation = get_use_activation(self) - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index b37cb75e584e..1ac0dca7d353 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -171,7 +171,7 @@ def get_scheduler_cls(self) -> type["SchedulerInterface"]: return cast(type["SchedulerInterface"], self.scheduler_cls) return resolve_obj_by_qualname(self.scheduler_cls) - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index 142e380579a6..2391ee61ab64 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -146,7 +146,7 @@ class SpeculativeConfig: tokens with estimated probability (based on frequency counts) greater than or equal to this value.""" - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/structured_outputs.py b/vllm/config/structured_outputs.py index 85ad529a1d68..a7c6e6500bbf 100644 --- a/vllm/config/structured_outputs.py +++ b/vllm/config/structured_outputs.py @@ -42,7 +42,7 @@ class StructuredOutputsConfig: enable_in_reasoning: bool = False """Whether to use structured input for reasoning.""" - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/utils.py b/vllm/config/utils.py index e8f66b6f102e..1a2cdad1bf92 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -156,8 +156,8 @@ def is_init_field(cls: ConfigType, name: str) -> bool: @runtime_checkable -class SupportsHash(Protocol): - def compute_hash(self, *, return_factors: bool = False) -> HashResult: ... +class SupportsCompileFactors(Protocol): + def compile_factors(self, *, return_factors: bool = False) -> HashResult: ... class SupportsMetricsInfo(Protocol): @@ -270,7 +270,7 @@ def normalize_value(x): ) -def get_hash_factors(config: ConfigT, ignored_factors: set[str]) -> dict[str, object]: +def get_compile_factors(config: ConfigT, ignored_factors: set[str]) -> dict[str, object]: """Gets the factors used for hashing a config class. - Includes all dataclass fields not in `ignored_factors`. - Errors on non-normalizable values. @@ -285,7 +285,7 @@ def get_hash_factors(config: ConfigT, ignored_factors: set[str]) -> dict[str, ob factors[factor] = normalize_value(value) except TypeError as e: raise TypeError( - f"get_hash_factors: unsupported type for key '{factor}' " + f"get_compile_factors: unsupported type for key '{factor}' " f"({type(value).__name__})" ) from e return factors diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 95cb3cc4fff4..a764edf7a987 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -40,7 +40,7 @@ from .scheduler import SchedulerConfig from .speculative import SpeculativeConfig from .structured_outputs import StructuredOutputsConfig -from .utils import HashResult, SupportsHash, config +from .utils import HashResult, SupportsCompileFactors, config if TYPE_CHECKING: from transformers import PretrainedConfig @@ -110,14 +110,14 @@ class VllmConfig: # some opaque config, only used to provide additional information # for the hash computation, mainly used for testing, debugging or out of # tree config registration. - additional_config: dict | SupportsHash = Field(default_factory=dict) + additional_config: dict | SupportsCompileFactors = Field(default_factory=dict) """Additional config for specified platform. Different platforms may support different configs. Make sure the configs are valid for the platform you are using. Contents must be hashable.""" instance_id: str = "" """The ID of the vLLM instance.""" - def compute_hash(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -137,31 +137,31 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: vllm_factors.append(__version__) if self.model_config: - vllm_factors.append(self.model_config.compute_hash()) + vllm_factors.append(self.model_config.compile_factors()) else: vllm_factors.append("None") if self.cache_config: - vllm_factors.append(self.cache_config.compute_hash()) + vllm_factors.append(self.cache_config.compile_factors()) else: vllm_factors.append("None") if self.parallel_config: - vllm_factors.append(self.parallel_config.compute_hash()) + vllm_factors.append(self.parallel_config.compile_factors()) else: vllm_factors.append("None") if self.scheduler_config: - vllm_factors.append(self.scheduler_config.compute_hash()) + vllm_factors.append(self.scheduler_config.compile_factors()) else: vllm_factors.append("None") if self.device_config: - vllm_factors.append(self.device_config.compute_hash()) + vllm_factors.append(self.device_config.compile_factors()) else: vllm_factors.append("None") if self.load_config: - vllm_factors.append(self.load_config.compute_hash()) + vllm_factors.append(self.load_config.compile_factors()) else: vllm_factors.append("None") if self.lora_config: - vllm_factors.append(self.lora_config.compute_hash()) + vllm_factors.append(self.lora_config.compile_factors()) # LoRA creates static buffers based on max_num_batched_tokens. # The tensor sizes and strides get captured in the torch.compile # graph explicitly. @@ -169,26 +169,26 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: else: vllm_factors.append("None") if self.speculative_config: - vllm_factors.append(self.speculative_config.compute_hash()) + vllm_factors.append(self.speculative_config.compile_factors()) else: vllm_factors.append("None") if self.structured_outputs_config: - vllm_factors.append(self.structured_outputs_config.compute_hash()) + vllm_factors.append(self.structured_outputs_config.compile_factors()) else: vllm_factors.append("None") - vllm_factors.append(self.observability_config.compute_hash()) + vllm_factors.append(self.observability_config.compile_factors()) if self.quant_config: pass # should be captured by model_config.quantization if self.compilation_config: - vllm_factors.append(self.compilation_config.compute_hash()) + vllm_factors.append(self.compilation_config.compile_factors()) else: vllm_factors.append("None") if self.kv_transfer_config: - vllm_factors.append(self.kv_transfer_config.compute_hash()) + vllm_factors.append(self.kv_transfer_config.compile_factors()) else: vllm_factors.append("None") if self.ec_transfer_config: - vllm_factors.append(self.ec_transfer_config.compute_hash()) + vllm_factors.append(self.ec_transfer_config.compile_factors()) else: vllm_factors.append("None") if self.additional_config: @@ -198,7 +198,7 @@ def compute_hash(self, *, return_factors: bool = False) -> HashResult: usedforsecurity=False, ).hexdigest() else: - additional_config_hash = additional_config.compute_hash() + additional_config_hash = additional_config.compile_factors() vllm_factors.append(additional_config_hash) else: vllm_factors.append("None") diff --git a/vllm/distributed/eplb/eplb_state.py b/vllm/distributed/eplb/eplb_state.py index 526d3ceac7b8..ddc1f8821590 100644 --- a/vllm/distributed/eplb/eplb_state.py +++ b/vllm/distributed/eplb/eplb_state.py @@ -385,7 +385,7 @@ def add_model( ) self.expert_rearrangement_step = 0 - self.model_states[model_config.compute_hash()] = EplbModelState( + self.model_states[model_config.compile_factors()] = EplbModelState( physical_to_logical_map, logical_to_physical_map, logical_replica_count, diff --git a/vllm/envs.py b/vllm/envs.py index a89ad989b617..177b0289ecbf 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -1657,7 +1657,7 @@ def compile_factors() -> dict[str, object]: return factors -def compute_hash(*, return_factors: bool = False): +def compile_factors(*, return_factors: bool = False): """Return a canonical hash for the environment compile factors.""" factors = compile_factors() if return_factors: diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 6be19894d332..0af6c81007b7 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -753,7 +753,7 @@ def _perform_handshake( } if vllm_config.parallel_config.data_parallel_size > 1: ready_msg["parallel_config_hash"] = ( - vllm_config.parallel_config.compute_hash() + vllm_config.parallel_config.compile_factors() ) handshake_socket.send(msgspec.msgpack.encode(ready_msg)) diff --git a/vllm/v1/engine/utils.py b/vllm/v1/engine/utils.py index d65cad7af03d..55f8a29f72ba 100644 --- a/vllm/v1/engine/utils.py +++ b/vllm/v1/engine/utils.py @@ -1015,7 +1015,7 @@ def wait_for_engine_startup( "data_parallel_size", ) }, - parallel_config_hash=parallel_config.compute_hash() + parallel_config_hash=parallel_config.compile_factors() if parallel_config.data_parallel_size > 1 else None, ) @@ -1041,7 +1041,7 @@ def wait_for_engine_startup( # Validate config hash consistency across DP workers if parallel_config.data_parallel_size > 1: worker_config_hash = msg.get("parallel_config_hash") - expected_hash = parallel_config.compute_hash() + expected_hash = parallel_config.compile_factors() if worker_config_hash != expected_hash: raise RuntimeError( f"Configuration mismatch detected for engine " From ce5fc7ea25d7f797a7f303927c0fc3e35f4716bb Mon Sep 17 00:00:00 2001 From: vnadathur Date: Thu, 20 Nov 2025 13:33:02 -0800 Subject: [PATCH 05/57] check if .compile_factors exists on the subobject allows us to stop handling passconfig specially. Signed-off-by: vnadathur Co-Authored-By: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- vllm/config/compilation.py | 2 -- vllm/config/utils.py | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 929eb84004a9..2951e0315649 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -526,11 +526,9 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: "traced_files", "compilation_time", "static_forward_context", - "pass_config", # handled separately below } factors = get_compile_factors(self, ignored_factors) - factors["pass_config"] = self.pass_config.compile_factors() if return_factors: return factors or None return hash_factors(factors) diff --git a/vllm/config/utils.py b/vllm/config/utils.py index 1a2cdad1bf92..20437f30eff2 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -281,6 +281,10 @@ def get_compile_factors(config: ConfigT, ignored_factors: set[str]) -> dict[str, if factor in ignored_factors: continue value = getattr(config, factor, None) + # Nested configs expose factors via compile_factors; unwrap first. + if hasattr(value, "compile_factors") and callable(value.compile_factors): + nested = value.compile_factors(return_factors=True) + value = [] if nested is None else nested try: factors[factor] = normalize_value(value) except TypeError as e: From 5eed2d77828011899b6cc08bc28e50bfd2dc8024 Mon Sep 17 00:00:00 2001 From: vnadathur Date: Thu, 20 Nov 2025 13:44:26 -0800 Subject: [PATCH 06/57] fix recusion Signed-off-by: vnadathur --- vllm/envs.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index afaa8b034e02..fbb5641322ab 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -1571,12 +1571,8 @@ def is_set(name: str): raise AttributeError(f"module {__name__!r} has no attribute {name!r}") -def compile_factors() -> dict[str, object]: - """Return env vars used for torch.compile cache keys. - - Start with every known vLLM env var; drop entries in `ignored_factors`; - hash everything else. This keeps the cache key aligned across workers.""" - +def _collect_compile_factors() -> dict[str, object]: + """Collect env vars used for torch.compile cache keys.""" ignored_factors: set[str] = { "MAX_JOBS", "VLLM_RPC_BASE_PATH", @@ -1682,9 +1678,9 @@ def compile_factors() -> dict[str, object]: return factors -def compile_factors(*, return_factors: bool = False): - """Return a canonical hash for the environment compile factors.""" - factors = compile_factors() +def compile_factors(*, return_factors: bool = True): + """Return env compile factors (dict) or hashed string when requested.""" + factors = _collect_compile_factors() if return_factors: - return factors if factors else None + return factors return hash_factors(factors) From fd33225985af6c7c995e0a17ff1ff01327694475 Mon Sep 17 00:00:00 2001 From: vnadathur Date: Thu, 20 Nov 2025 13:46:26 -0800 Subject: [PATCH 07/57] envs.py update Signed-off-by: vnadathur --- vllm/envs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index fbb5641322ab..cd8db7584b4e 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -1571,7 +1571,7 @@ def is_set(name: str): raise AttributeError(f"module {__name__!r} has no attribute {name!r}") -def _collect_compile_factors() -> dict[str, object]: +def collect_compile_factors() -> dict[str, object]: """Collect env vars used for torch.compile cache keys.""" ignored_factors: set[str] = { "MAX_JOBS", @@ -1680,7 +1680,7 @@ def _collect_compile_factors() -> dict[str, object]: def compile_factors(*, return_factors: bool = True): """Return env compile factors (dict) or hashed string when requested.""" - factors = _collect_compile_factors() + factors = collect_compile_factors() if return_factors: return factors return hash_factors(factors) From 77eed455e1300b64e93b86da9df6913080904a61 Mon Sep 17 00:00:00 2001 From: vnadathur Date: Thu, 20 Nov 2025 13:51:51 -0800 Subject: [PATCH 08/57] fix precommit Signed-off-by: vnadathur --- vllm/config/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/config/utils.py b/vllm/config/utils.py index 20437f30eff2..33f29f5eb047 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -270,7 +270,9 @@ def normalize_value(x): ) -def get_compile_factors(config: ConfigT, ignored_factors: set[str]) -> dict[str, object]: +def get_compile_factors( + config: ConfigT, ignored_factors: set[str] +) -> dict[str, object]: """Gets the factors used for hashing a config class. - Includes all dataclass fields not in `ignored_factors`. - Errors on non-normalizable values. From 76c276123286e7561b7af2d6913ac0962f504f4f Mon Sep 17 00:00:00 2001 From: vnadathur Date: Thu, 20 Nov 2025 14:02:42 -0800 Subject: [PATCH 09/57] precommit Signed-off-by: vnadathur --- vllm/distributed/eplb/eplb_state.py | 4 +++- vllm/v1/engine/utils.py | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/vllm/distributed/eplb/eplb_state.py b/vllm/distributed/eplb/eplb_state.py index ddc1f8821590..a352e43353b0 100644 --- a/vllm/distributed/eplb/eplb_state.py +++ b/vllm/distributed/eplb/eplb_state.py @@ -29,6 +29,7 @@ import time from collections.abc import Sequence from dataclasses import dataclass +from typing import cast import torch from torch.distributed import ProcessGroup, all_reduce @@ -385,7 +386,8 @@ def add_model( ) self.expert_rearrangement_step = 0 - self.model_states[model_config.compile_factors()] = EplbModelState( + model_hash = cast(str, model_config.compile_factors()) + self.model_states[model_hash] = EplbModelState( physical_to_logical_map, logical_to_physical_map, logical_replica_count, diff --git a/vllm/v1/engine/utils.py b/vllm/v1/engine/utils.py index 55f8a29f72ba..0157806655c6 100644 --- a/vllm/v1/engine/utils.py +++ b/vllm/v1/engine/utils.py @@ -9,7 +9,7 @@ from enum import Enum, auto from multiprocessing import Process, connection from multiprocessing.process import BaseProcess -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast from unittest.mock import patch import msgspec @@ -1000,6 +1000,7 @@ def wait_for_engine_startup( f"dp lb mode" ) + parallel_hash = cast(str, parallel_config.compile_factors()) if status == "HELLO" and engine.state == CoreEngineState.NEW: # Send init message with DP config info and config hash. # The config hash ensures all DP workers have compatible configs. @@ -1015,7 +1016,7 @@ def wait_for_engine_startup( "data_parallel_size", ) }, - parallel_config_hash=parallel_config.compile_factors() + parallel_config_hash=parallel_hash if parallel_config.data_parallel_size > 1 else None, ) @@ -1041,7 +1042,7 @@ def wait_for_engine_startup( # Validate config hash consistency across DP workers if parallel_config.data_parallel_size > 1: worker_config_hash = msg.get("parallel_config_hash") - expected_hash = parallel_config.compile_factors() + expected_hash = parallel_hash if worker_config_hash != expected_hash: raise RuntimeError( f"Configuration mismatch detected for engine " From 9cdc0d73079f01397cfe514870bb615251224697 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Thu, 20 Nov 2025 18:46:23 -0500 Subject: [PATCH 10/57] addressed reviewer concerns Signed-off-by: WorldExplored Co-Authored-By: vnadathur <236933696+vnadathur@users.noreply.github.com> --- tests/compile/fullgraph/test_toy_llama.py | 14 ++++++-------- tests/config/test_multimodal_config.py | 6 +++--- vllm/compilation/backends.py | 21 +++++++++++++-------- vllm/config/cache.py | 7 ++----- vllm/config/utils.py | 2 +- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tests/compile/fullgraph/test_toy_llama.py b/tests/compile/fullgraph/test_toy_llama.py index 7b6e97981a13..52052399325a 100644 --- a/tests/compile/fullgraph/test_toy_llama.py +++ b/tests/compile/fullgraph/test_toy_llama.py @@ -45,16 +45,14 @@ class LlamaConfig: tractable_init: bool = False random_seed: int = 0 - def compile_factors(self) -> str: - factors: list[Any] = [] - for k, v in self.__dict__.items(): - if k == "random_seed": + def compile_factors(self) -> list[tuple[str, Any]]: + factors: list[tuple[str, Any]] = [] + for key, value in self.__dict__.items(): + if key == "random_seed": continue - factors.append((k, v)) + factors.append((key, value)) factors.sort() - import hashlib - - return hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest() + return factors def __post_init__(self): assert self.mlp_size >= self.hidden_size diff --git a/tests/config/test_multimodal_config.py b/tests/config/test_multimodal_config.py index 90ae2a49d280..e6885f06ae71 100644 --- a/tests/config/test_multimodal_config.py +++ b/tests/config/test_multimodal_config.py @@ -18,8 +18,8 @@ def test_mm_encoder_attn_backend_invalid(): def test_mm_encoder_attn_backend_hash_updates(): - base_hash = MultiModalConfig().compile_factors() - overridden_hash = MultiModalConfig( + base_compile_signature = MultiModalConfig().compile_factors() + overridden_compile_signature = MultiModalConfig( mm_encoder_attn_backend=AttentionBackendEnum.FLASH_ATTN ).compile_factors() - assert base_hash != overridden_hash + assert base_compile_signature != overridden_compile_signature diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 4818ab29c3c0..31995e737325 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -590,8 +590,8 @@ def __call__( env_factors = envs.compile_factors() env_hash = hash_factors(env_factors) # Compute config/compiler/code hashes once and reuse - config_hash = vllm_config.compile_factors() - compiler_hash = self.compiler_manager.compile_factors(vllm_config) + config_compile_signature = vllm_config.compile_factors() + compiler_compile_signature = self.compiler_manager.compile_factors(vllm_config) forward_code_files = list(sorted(self.compilation_config.traced_files)) logger.debug( @@ -619,7 +619,12 @@ def __call__( # that affects the compilation. if none of the factors change, # the cache dir will be the same so that we can reuse the compiled # graph. - factors = [env_hash, config_hash, code_hash, compiler_hash] + factors = [ + env_hash, + config_compile_signature, + code_hash, + compiler_compile_signature, + ] # Use SHA-256 for cache key hashing to be consistent across # compile_factors functions. Truncate for a short cache dir name. hash_key = hashlib.sha256(str(factors).encode()).hexdigest()[:10] @@ -660,8 +665,8 @@ def __call__( logger.debug( "torch.compile cache factors: env=%s cfg=%s comp=%s code=%s dir=%s", env_hash, - config_hash, - compiler_hash, + config_compile_signature, + compiler_compile_signature, code_hash, local_cache_dir, ) @@ -671,7 +676,7 @@ def __call__( logger.debug( "Compile env factors (raw):\n%s\nVllm config hash: %s", lazy(partial(pprint.pformat, env_factors, width=120)), - config_hash, + config_compile_signature, ) meta_path = os.path.join(local_cache_dir, "cache_key_factors.json") if not os.path.exists(meta_path): @@ -679,9 +684,9 @@ def __call__( json.dump( { "env": env_factors, # raw factors used for env_hash - "config_hash": config_hash, + "config_hash": config_compile_signature, "code_hash": code_hash, - "compiler_hash": compiler_hash, + "compiler_hash": compiler_compile_signature, }, f, indent=2, diff --git a/vllm/config/cache.py b/vllm/config/cache.py index 8e2952750e92..31b760d66640 100644 --- a/vllm/config/cache.py +++ b/vllm/config/cache.py @@ -7,7 +7,7 @@ from pydantic import Field, SkipValidation, field_validator from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config, get_compile_factors, hash_factors +from vllm.config.utils import HashResult, config, get_compile_factors from vllm.logger import init_logger from vllm.utils.mem_constants import GiB_bytes from vllm.utils.mem_utils import get_cpu_memory @@ -181,10 +181,7 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: "kv_sharing_fast_prefill", } - factors = get_compile_factors(self, ignored_factors) - if return_factors: - return factors or None - return hash_factors(factors) + return get_compile_factors(self, ignored_factors) def metrics_info(self): # convert cache_config to dict(key: str, value: str) for prometheus diff --git a/vllm/config/utils.py b/vllm/config/utils.py index 33f29f5eb047..b9a4d56822bc 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -284,7 +284,7 @@ def get_compile_factors( continue value = getattr(config, factor, None) # Nested configs expose factors via compile_factors; unwrap first. - if hasattr(value, "compile_factors") and callable(value.compile_factors): + if isinstance(value, SupportsCompileFactors): nested = value.compile_factors(return_factors=True) value = [] if nested is None else nested try: From b3bca0753414693f4687ec355146dffef3ff89a4 Mon Sep 17 00:00:00 2001 From: vnadathur Date: Thu, 20 Nov 2025 16:54:15 -0800 Subject: [PATCH 11/57] factors can be a list of recursive dicts, and we can call hash_factors on it to produce the final hash. Include the raw factors for config, code, and compiler Signed-off-by: vnadathur Co-Authored-By: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- docs/design/torch_compile.md | 4 +- vllm/compilation/backends.py | 54 ++++++++++------ vllm/compilation/compiler_interface.py | 30 +++------ vllm/config/cache.py | 5 +- vllm/config/compilation.py | 14 ++-- vllm/config/device.py | 9 +-- vllm/config/ec_transfer.py | 12 ++-- vllm/config/kv_transfer.py | 12 ++-- vllm/config/load.py | 12 ++-- vllm/config/lora.py | 27 ++++---- vllm/config/model.py | 14 +--- vllm/config/multimodal.py | 11 ++-- vllm/config/observability.py | 9 +-- vllm/config/parallel.py | 8 +-- vllm/config/pooler.py | 12 ++-- vllm/config/scheduler.py | 12 ++-- vllm/config/speculative.py | 20 +++--- vllm/config/structured_outputs.py | 13 ++-- vllm/config/utils.py | 8 +-- vllm/config/vllm.py | 89 ++++++++------------------ vllm/distributed/eplb/eplb_state.py | 5 +- vllm/envs.py | 11 ++-- vllm/v1/engine/core.py | 6 +- vllm/v1/engine/utils.py | 6 +- 24 files changed, 166 insertions(+), 237 deletions(-) diff --git a/docs/design/torch_compile.md b/docs/design/torch_compile.md index 27edc4f89201..996d5b706297 100644 --- a/docs/design/torch_compile.md +++ b/docs/design/torch_compile.md @@ -19,8 +19,8 @@ vLLM will take all the available factors into consideration, and decide a direct The factors considered include: -- All the related configs (see the `compute_hash` functions in their respective configs in the [config folder](../../vllm/config)) -- PyTorch configs (see the `compute_hash` functions in the [compiler_interface.py](../../vllm/compilation/compiler_interface.py)) +- All the related configs (see the `compile_factors` functions in their respective configs in the [config folder](../../vllm/config)) +- PyTorch configs (see the `compile_factors` functions in the [compiler_interface.py](../../vllm/compilation/compiler_interface.py)) - The model's forward function and the relevant functions called by the forward function (see below) With all these factors taken into consideration, usually we can guarantee that the cache is safe to use, and will not cause any unexpected behavior. Therefore, the cache is enabled by default. If you want to debug the compilation process, or if you suspect the cache is causing some issues, you can disable it by setting the environment variable `VLLM_DISABLE_COMPILE_CACHE=1`. diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 31995e737325..f34fdba29d4f 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -93,8 +93,14 @@ def __init__(self, compilation_config: CompilationConfig): self.compilation_config = compilation_config self.compiler = make_compiler(compilation_config) - def compile_factors(self, vllm_config: VllmConfig) -> str: - return self.compiler.compile_factors(vllm_config) + def compile_factors( + self, vllm_config: VllmConfig + ) -> tuple[dict[str, object], str]: + raw_factors = self.compiler.compile_factors(vllm_config) or {} + hash_str = hashlib.md5( + str(raw_factors).encode(), usedforsecurity=False + ).hexdigest()[:10] + return raw_factors, hash_str @contextmanager def compile_context(self, runtime_shape: int | None = None): @@ -590,28 +596,40 @@ def __call__( env_factors = envs.compile_factors() env_hash = hash_factors(env_factors) # Compute config/compiler/code hashes once and reuse - config_compile_signature = vllm_config.compile_factors() - compiler_compile_signature = self.compiler_manager.compile_factors(vllm_config) + config_factors = vllm_config.compile_factors() or {} + config_hash = hash_factors(config_factors) + compiler_factors, compiler_hash = self.compiler_manager.compile_factors( + vllm_config + ) forward_code_files = list(sorted(self.compilation_config.traced_files)) logger.debug( "Traced files (to be considered for compilation cache):\n%s", lazy(lambda: "\n".join(forward_code_files)), ) - hash_content = [] + code_factors: list[dict[str, str]] = [] + hash_content_parts: list[str] = [] for filepath in forward_code_files: - hash_content.append(filepath) + hash_content_parts.append(filepath) + entry: dict[str, str] = {"path": filepath} if filepath == "": # This means the function was dynamically generated, with # e.g. exec(). We can't actually check these. + code_factors.append(entry) continue try: with open(filepath) as f: - hash_content.append(f.read()) + content = f.read() except Exception: logger.warning("Failed to read file %s", filepath) + code_factors.append(entry) continue - code_hash = hashlib.sha256("\n".join(hash_content).encode()).hexdigest() + entry["content"] = content + code_factors.append(entry) + hash_content_parts.append(content) + code_hash = hashlib.sha256( + "\n".join(hash_content_parts).encode() + ).hexdigest() # Clear after consumption self.compilation_config.traced_files.clear() if not self.compilation_config.cache_dir: @@ -619,12 +637,7 @@ def __call__( # that affects the compilation. if none of the factors change, # the cache dir will be the same so that we can reuse the compiled # graph. - factors = [ - env_hash, - config_compile_signature, - code_hash, - compiler_compile_signature, - ] + factors = [env_hash, config_hash, code_hash, compiler_hash] # Use SHA-256 for cache key hashing to be consistent across # compile_factors functions. Truncate for a short cache dir name. hash_key = hashlib.sha256(str(factors).encode()).hexdigest()[:10] @@ -665,8 +678,8 @@ def __call__( logger.debug( "torch.compile cache factors: env=%s cfg=%s comp=%s code=%s dir=%s", env_hash, - config_compile_signature, - compiler_compile_signature, + config_hash, + compiler_hash, code_hash, local_cache_dir, ) @@ -676,7 +689,7 @@ def __call__( logger.debug( "Compile env factors (raw):\n%s\nVllm config hash: %s", lazy(partial(pprint.pformat, env_factors, width=120)), - config_compile_signature, + config_hash, ) meta_path = os.path.join(local_cache_dir, "cache_key_factors.json") if not os.path.exists(meta_path): @@ -684,9 +697,12 @@ def __call__( json.dump( { "env": env_factors, # raw factors used for env_hash - "config_hash": config_compile_signature, + "config": config_factors, + "config_hash": config_hash, + "compiler": compiler_factors, + "compiler_hash": compiler_hash, "code_hash": code_hash, - "compiler_hash": compiler_compile_signature, + "code": code_factors, }, f, indent=2, diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py index a340b9636b2a..5462b2695515 100644 --- a/vllm/compilation/compiler_interface.py +++ b/vllm/compilation/compiler_interface.py @@ -2,7 +2,6 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import contextlib import copy -import hashlib import os from collections.abc import Callable from contextlib import ExitStack @@ -46,17 +45,16 @@ def initialize_cache( """ pass - def compile_factors(self, vllm_config: VllmConfig) -> str: + def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object] | None: """ - Gather all the relevant information from the vLLM config, - to compute a hash so that we can cache the compiled model. + Gather compiler-specific factors that influence the generated code. See [`VllmConfig.compile_factors`][vllm.config.VllmConfig.compile_factors] - to check what information - is already considered by default. This function should only - consider the information that is specific to the compiler. + for the base configuration factors. This method should return any + additional data that uniquely identifies the compiler's contribution to + the cache key. """ - return "" + return None def compile( self, @@ -195,12 +193,8 @@ class InductorStandaloneAdaptor(CompilerInterface): def __init__(self, save_format: Literal["binary", "unpacked"]): self.save_format = save_format - def compile_factors(self, vllm_config: VllmConfig) -> str: - factors = get_inductor_factors() - hash_str = hashlib.md5( - str(factors).encode(), usedforsecurity=False - ).hexdigest()[:10] - return hash_str + def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object] | None: + return {"inductor_standalone": get_inductor_factors()} def initialize_cache( self, cache_dir: str, disable_cache: bool = False, prefix: str = "" @@ -284,12 +278,8 @@ class InductorAdaptor(CompilerInterface): name = "inductor" - def compile_factors(self, vllm_config: VllmConfig) -> str: - factors = get_inductor_factors() - hash_str = hashlib.md5( - str(factors).encode(), usedforsecurity=False - ).hexdigest()[:10] - return hash_str + def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object] | None: + return {"inductor": get_inductor_factors()} def initialize_cache( self, cache_dir: str, disable_cache: bool = False, prefix: str = "" diff --git a/vllm/config/cache.py b/vllm/config/cache.py index 31b760d66640..4172b0a15001 100644 --- a/vllm/config/cache.py +++ b/vllm/config/cache.py @@ -147,7 +147,7 @@ class CacheConfig: 'native' (vLLM native CPU offloading), 'lmcache' This option must be used together with kv_offloading_size.""" - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ WARNING: Whenever a new field is added to this config, review `ignored_factors` to decide whether the field should be excluded. @@ -181,7 +181,8 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: "kv_sharing_fast_prefill", } - return get_compile_factors(self, ignored_factors) + factors = get_compile_factors(self, ignored_factors) + return factors or None def metrics_info(self): # convert cache_config to dict(key: str, value: str) for prometheus diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 2951e0315649..86ac8e7d6ec1 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -13,7 +13,7 @@ import vllm.envs as envs from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass -from vllm.config.utils import HashResult, config, get_compile_factors, hash_factors +from vllm.config.utils import HashResult, config, get_compile_factors from vllm.logger import init_logger from vllm.platforms import current_platform from vllm.utils.import_utils import resolve_obj_by_qualname @@ -159,16 +159,14 @@ def default_fi_allreduce_fusion_max_size_mb() -> dict[int, float]: current_platform.get_device_capability().to_int(), {} ) - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ Produces a hash unique to the pass configuration. Any new fields that affect compilation should be added to the hash. Any future fields that don't affect compilation should be excluded. """ factors = asdict(self) - if return_factors: - return factors or None - return InductorPass.hash_dict(factors) + return factors or None def __post_init__(self) -> None: if not self.enable_noop: @@ -506,7 +504,7 @@ class CompilationConfig: "vllm::sparse_attn_indexer", ] - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ Provide a hash that uniquely identifies all the configs that affect the structure of the computation @@ -529,9 +527,7 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: } factors = get_compile_factors(self, ignored_factors) - if return_factors: - return factors or None - return hash_factors(factors) + return factors or None def __repr__(self) -> str: exclude = { diff --git a/vllm/config/device.py b/vllm/config/device.py index 350fa1f37a58..477eb89a5c1a 100644 --- a/vllm/config/device.py +++ b/vllm/config/device.py @@ -8,7 +8,7 @@ from pydantic import ConfigDict, SkipValidation from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config, hash_factors, normalize_value +from vllm.config.utils import HashResult, config Device = Literal["auto", "cuda", "cpu", "tpu", "xpu"] @@ -28,7 +28,7 @@ class DeviceConfig: """Device type from the current platform. This is set in `__post_init__`.""" - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -43,10 +43,7 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: # no factors to consider. # the device/platform information will be summarized # by torch/vllm automatically. - factors: list[Any] = [] - if return_factors: - return factors or None - return hash_factors({"factors": normalize_value(factors)}) + return None def __post_init__(self): if self.device == "auto": diff --git a/vllm/config/ec_transfer.py b/vllm/config/ec_transfer.py index 01981d22bfc5..023eb03b1173 100644 --- a/vllm/config/ec_transfer.py +++ b/vllm/config/ec_transfer.py @@ -6,7 +6,7 @@ from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config, hash_factors, normalize_value +from vllm.config.utils import HashResult, config ECProducer = Literal["ec_producer"] ECConsumer = Literal["ec_consumer"] @@ -59,7 +59,7 @@ class ECTransferConfig: """The Python module path to dynamically load the EC connector from. Only supported in V1.""" - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -71,12 +71,8 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: excluding anything before input ids/embeddings and after the final hidden states. """ - # no factors to consider. - # this config will not affect the computation graph. - factors: list[Any] = [] - if return_factors: - return factors or None - return hash_factors({"factors": normalize_value(factors)}) + # This config does not affect the compiled graph. + return None def __post_init__(self) -> None: if self.engine_id is None: diff --git a/vllm/config/kv_transfer.py b/vllm/config/kv_transfer.py index 0dc0a5d27059..60edab0987bd 100644 --- a/vllm/config/kv_transfer.py +++ b/vllm/config/kv_transfer.py @@ -7,7 +7,7 @@ from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config, hash_factors, normalize_value +from vllm.config.utils import HashResult, config KVProducer = Literal["kv_producer", "kv_both"] KVConsumer = Literal["kv_consumer", "kv_both"] @@ -63,7 +63,7 @@ class KVTransferConfig: enable_permute_local_kv: bool = False """Experiment feature flag to enable HND to NHD KV Transfer""" - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -75,12 +75,8 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: excluding anything before input ids/embeddings and after the final hidden states. """ - # no factors to consider. - # this config will not affect the computation graph. - factors: list[Any] = [] - if return_factors: - return factors or None - return hash_factors({"factors": normalize_value(factors)}) + # This config does not affect the compiled graph. + return None def __post_init__(self) -> None: if self.engine_id is None: diff --git a/vllm/config/load.py b/vllm/config/load.py index 2c276690e6e5..3a7b7742de81 100644 --- a/vllm/config/load.py +++ b/vllm/config/load.py @@ -6,7 +6,7 @@ from pydantic import Field, field_validator from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config, hash_factors, normalize_value +from vllm.config.utils import HashResult, config from vllm.logger import init_logger if TYPE_CHECKING: @@ -88,7 +88,7 @@ class LoadConfig: see original doc for `map_location` in https://pytorch.org/docs/stable/generated/torch.load.html """ - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -100,12 +100,8 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: excluding anything before input ids/embeddings and after the final hidden states. """ - # no factors to consider. - # this config will not affect the computation graph. - factors: list[Any] = [] - if return_factors: - return factors or None - return hash_factors({"factors": normalize_value(factors)}) + # This config does not affect the compiled graph. + return None @field_validator("load_format", mode="after") def _lowercase_load_format(cls, load_format: str) -> str: diff --git a/vllm/config/lora.py b/vllm/config/lora.py index 1dac16929678..17e6881ddead 100644 --- a/vllm/config/lora.py +++ b/vllm/config/lora.py @@ -8,7 +8,7 @@ from pydantic.dataclasses import dataclass from typing_extensions import Self -from vllm.config.utils import HashResult, config, hash_factors, normalize_value +from vllm.config.utils import HashResult, config, normalize_value from vllm.logger import init_logger from vllm.platforms import current_platform @@ -69,7 +69,7 @@ class LoRAConfig: will be automatically assigned to 1-n with the names of the modalities in alphabetic order.""" - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -81,17 +81,18 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: excluding anything before input ids/embeddings and after the final hidden states. """ - factors: list[Any] = [] - factors.append(self.max_lora_rank) - factors.append(self.max_loras) - factors.append(self.fully_sharded_loras) - factors.append(self.lora_dtype) - factors.append(self.lora_extra_vocab_size) - factors.append(self.lora_vocab_padding_size) - - if return_factors: - return factors or None - return hash_factors({"factors": normalize_value(factors)}) + factors: list[Any] = [ + self.max_lora_rank, + self.max_loras, + self.fully_sharded_loras, + self.lora_dtype, + self.lora_extra_vocab_size, + self.lora_vocab_padding_size, + ] + normalized = normalize_value(factors) + if not normalized: + return None + return {"factors": normalized} @model_validator(mode="after") def _validate_lora_config(self) -> Self: diff --git a/vllm/config/model.py b/vllm/config/model.py index 2e81b98e8faf..2ba54b340f7c 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -17,13 +17,7 @@ from vllm.config.multimodal import MMCacheType, MMEncoderTPMode, MultiModalConfig from vllm.config.pooler import PoolerConfig from vllm.config.scheduler import RunnerType -from vllm.config.utils import ( - HashResult, - config, - get_compile_factors, - getattr_iter, - hash_factors, -) +from vllm.config.utils import HashResult, config, get_compile_factors, getattr_iter from vllm.logger import init_logger from vllm.platforms import current_platform from vllm.transformers_utils.config import ( @@ -316,7 +310,7 @@ class ModelConfig: skip_mm_profiling: InitVar[bool | None] = None video_pruning_rate: InitVar[float | None] = None - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ WARNING: Whenever a new field is added to this config, review `ignored_factors` to decide whether that field must be excluded. @@ -372,9 +366,7 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: } factors = get_compile_factors(self, ignored_factors) - if return_factors: - return factors or None - return hash_factors(factors) + return factors or None def _update_nested( self, diff --git a/vllm/config/multimodal.py b/vllm/config/multimodal.py index 35ce12af9f56..4e73e6e5b01d 100644 --- a/vllm/config/multimodal.py +++ b/vllm/config/multimodal.py @@ -7,7 +7,7 @@ from pydantic import ConfigDict, Field, field_validator, model_validator from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config, hash_factors, normalize_value +from vllm.config.utils import HashResult, config, normalize_value if TYPE_CHECKING: from vllm.attention.backends.registry import AttentionBackendEnum @@ -192,7 +192,7 @@ def _validate_multimodal_config(self): ) return self - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -209,9 +209,10 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: if self.mm_encoder_attn_backend is not None else None ] - if return_factors: - return factors or None - return hash_factors({"factors": normalize_value(factors)}) + normalized = normalize_value(factors) + if not normalized: + return None + return {"factors": normalized} def get_limit_per_prompt(self, modality: str) -> int: """ diff --git a/vllm/config/observability.py b/vllm/config/observability.py index 50d904fb41f6..ac147e2ebba7 100644 --- a/vllm/config/observability.py +++ b/vllm/config/observability.py @@ -9,7 +9,7 @@ from pydantic.dataclasses import dataclass from vllm import version -from vllm.config.utils import HashResult, config, hash_factors, normalize_value +from vllm.config.utils import HashResult, config DetailedTraceModules = Literal["model", "worker", "all"] @@ -62,7 +62,7 @@ def collect_model_execute_time(self) -> bool: or "all" in self.collect_detailed_traces ) - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -76,10 +76,7 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ # no factors to consider. # this config will not affect the computation graph. - factors: list[Any] = [] - if return_factors: - return factors or None - return hash_factors({"factors": normalize_value(factors)}) + return None @field_validator("show_hidden_metrics_for_version") @classmethod diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index a967e166ef17..b79cdf18ec86 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -11,7 +11,7 @@ from typing_extensions import Self import vllm.envs as envs -from vllm.config.utils import HashResult, config, get_compile_factors, hash_factors +from vllm.config.utils import HashResult, config, get_compile_factors from vllm.logger import init_logger from vllm.model_executor.layers.batch_invariant import ( vllm_is_batch_invariant, @@ -454,7 +454,7 @@ def sync_kv_cache_memory_size(dp_group: ProcessGroup, kv_cache_memory: int) -> i torch.distributed.all_reduce(tensor, op=ReduceOp.MIN, group=dp_group) return tensor.item() - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ Provide a hash that uniquely identifies all the configs that affect the structure of the computation @@ -501,9 +501,7 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: factors = get_compile_factors(self, ignored_factors) # Explicitly include backend affecting env factor as before factors["VLLM_ALL2ALL_BACKEND"] = str(envs.VLLM_ALL2ALL_BACKEND) - if return_factors: - return factors or None - return hash_factors(factors) + return factors or None def __post_init__(self) -> None: # Set all2all_backend from env var if not specified, with deprecation warning diff --git a/vllm/config/pooler.py b/vllm/config/pooler.py index f37f46b210d5..05aa36841d5c 100644 --- a/vllm/config/pooler.py +++ b/vllm/config/pooler.py @@ -1,11 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import Any - from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config, hash_factors, normalize_value +from vllm.config.utils import HashResult, config from vllm.logger import init_logger logger = init_logger(__name__) @@ -86,7 +84,7 @@ def __post_init__(self): # raise deprecated warning for softmax and activation self.use_activation = get_use_activation(self) - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -100,10 +98,8 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: """ # no factors to consider. # this config will not affect the computation graph. - factors: list[Any] = [] - if return_factors: - return factors or None - return hash_factors({"factors": normalize_value(factors)}) + # No compile-time factors. + return None def get_use_activation(o: object): diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index e7c6f6f8adf7..88d4e359f6c8 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -9,7 +9,7 @@ from pydantic.dataclasses import dataclass from typing_extensions import Self, deprecated -from vllm.config.utils import HashResult, config, hash_factors, normalize_value +from vllm.config.utils import HashResult, config from vllm.logger import init_logger from vllm.utils.import_utils import resolve_obj_by_qualname @@ -162,7 +162,7 @@ def get_scheduler_cls(self) -> type["SchedulerInterface"]: return cast(type["SchedulerInterface"], self.scheduler_cls) return resolve_obj_by_qualname(self.scheduler_cls) - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -174,12 +174,8 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: excluding anything before input ids/embeddings and after the final hidden states. """ - # no factors to consider. - # this config will not affect the computation graph. - factors: list[Any] = [] - if return_factors: - return factors or None - return hash_factors({"factors": normalize_value(factors)}) + # This config does not affect the compiled graph. + return None @field_validator("scheduler_cls", "async_scheduling", mode="wrap") @classmethod diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index f5b9f6978625..bf4f81b94351 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -9,7 +9,7 @@ from typing_extensions import Self from vllm.config.parallel import ParallelConfig -from vllm.config.utils import HashResult, config, hash_factors, normalize_value +from vllm.config.utils import HashResult, config, normalize_value from vllm.logger import init_logger from vllm.utils.import_utils import LazyLoader, has_arctic_inference @@ -146,7 +146,7 @@ class SpeculativeConfig: tokens with estimated probability (based on frequency counts) greater than or equal to this value.""" - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -158,13 +158,15 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: excluding anything before input ids/embeddings and after the final hidden states. """ - factors: list[Any] = [] - # Eagle3 affects the computation graph because it returns intermediate - # hidden states in addition to the final hidden state. - factors.append(self.method == "eagle3") - if return_factors: - return factors or None - return hash_factors({"factors": normalize_value(factors)}) + factors: list[Any] = [ + # Eagle3 affects the computation graph because it returns + # intermediate hidden states in addition to the final hidden state. + self.method == "eagle3" + ] + normalized = normalize_value(factors) + if not normalized: + return None + return {"factors": normalized} @staticmethod def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig: diff --git a/vllm/config/structured_outputs.py b/vllm/config/structured_outputs.py index a7c6e6500bbf..02b5376f5e87 100644 --- a/vllm/config/structured_outputs.py +++ b/vllm/config/structured_outputs.py @@ -1,13 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import Any, Literal +from typing import Literal from pydantic import model_validator from pydantic.dataclasses import dataclass from typing_extensions import Self -from vllm.config.utils import HashResult, config, hash_factors, normalize_value +from vllm.config.utils import HashResult, config StructuredOutputsBackend = Literal[ "auto", "xgrammar", "guidance", "outlines", "lm-format-enforcer" @@ -42,7 +42,7 @@ class StructuredOutputsConfig: enable_in_reasoning: bool = False """Whether to use structured input for reasoning.""" - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -54,12 +54,7 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: excluding anything before input ids/embeddings and after the final hidden states. """ - # no factors to consider. - # this config will not affect the computation graph. - factors: list[Any] = [] - if return_factors: - return factors or None - return hash_factors({"factors": normalize_value(factors)}) + return None @model_validator(mode="after") def _validate_structured_output_config(self) -> Self: diff --git a/vllm/config/utils.py b/vllm/config/utils.py index b9a4d56822bc..c7a11b0f3182 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -26,7 +26,7 @@ ConfigType = type[DataclassInstance] ConfigT = TypeVar("ConfigT", bound=ConfigType) -HashResult = str | dict[str, object] | list[Any] | None +HashResult = dict[str, object] | None def config(cls: ConfigT) -> ConfigT: @@ -157,7 +157,7 @@ def is_init_field(cls: ConfigType, name: str) -> bool: @runtime_checkable class SupportsCompileFactors(Protocol): - def compile_factors(self, *, return_factors: bool = False) -> HashResult: ... + def compile_factors(self) -> HashResult: ... class SupportsMetricsInfo(Protocol): @@ -285,8 +285,8 @@ def get_compile_factors( value = getattr(config, factor, None) # Nested configs expose factors via compile_factors; unwrap first. if isinstance(value, SupportsCompileFactors): - nested = value.compile_factors(return_factors=True) - value = [] if nested is None else nested + nested = value.compile_factors() + value = {} if nested is None else nested try: factors[factor] = normalize_value(value) except TypeError as e: diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index a764edf7a987..91416e5ea269 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -3,8 +3,6 @@ import copy import getpass -import hashlib -import json import os import tempfile import threading @@ -117,7 +115,7 @@ class VllmConfig: instance_id: str = "" """The ID of the vLLM instance.""" - def compile_factors(self, *, return_factors: bool = False) -> HashResult: + def compile_factors(self) -> HashResult: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -136,81 +134,44 @@ def compile_factors(self, *, return_factors: bool = False) -> HashResult: from vllm import __version__ vllm_factors.append(__version__) - if self.model_config: - vllm_factors.append(self.model_config.compile_factors()) - else: - vllm_factors.append("None") - if self.cache_config: - vllm_factors.append(self.cache_config.compile_factors()) - else: - vllm_factors.append("None") - if self.parallel_config: - vllm_factors.append(self.parallel_config.compile_factors()) - else: - vllm_factors.append("None") - if self.scheduler_config: - vllm_factors.append(self.scheduler_config.compile_factors()) - else: - vllm_factors.append("None") - if self.device_config: - vllm_factors.append(self.device_config.compile_factors()) - else: - vllm_factors.append("None") - if self.load_config: - vllm_factors.append(self.load_config.compile_factors()) - else: - vllm_factors.append("None") + + def _append_config(config_obj: SupportsCompileFactors | None) -> None: + vllm_factors.append(config_obj.compile_factors() if config_obj else None) + + _append_config(self.model_config) + _append_config(self.cache_config) + _append_config(self.parallel_config) + _append_config(self.scheduler_config) + _append_config(self.device_config) + _append_config(self.load_config) if self.lora_config: vllm_factors.append(self.lora_config.compile_factors()) # LoRA creates static buffers based on max_num_batched_tokens. # The tensor sizes and strides get captured in the torch.compile # graph explicitly. - vllm_factors.append(str(self.scheduler_config.max_num_batched_tokens)) - else: - vllm_factors.append("None") - if self.speculative_config: - vllm_factors.append(self.speculative_config.compile_factors()) + vllm_factors.append(self.scheduler_config.max_num_batched_tokens) else: - vllm_factors.append("None") - if self.structured_outputs_config: - vllm_factors.append(self.structured_outputs_config.compile_factors()) - else: - vllm_factors.append("None") + vllm_factors.append(None) + _append_config(self.speculative_config) + _append_config(self.structured_outputs_config) vllm_factors.append(self.observability_config.compile_factors()) - if self.quant_config: - pass # should be captured by model_config.quantization if self.compilation_config: vllm_factors.append(self.compilation_config.compile_factors()) else: - vllm_factors.append("None") - if self.kv_transfer_config: - vllm_factors.append(self.kv_transfer_config.compile_factors()) - else: - vllm_factors.append("None") - if self.ec_transfer_config: - vllm_factors.append(self.ec_transfer_config.compile_factors()) - else: - vllm_factors.append("None") + vllm_factors.append(None) + _append_config(self.kv_transfer_config) + _append_config(self.ec_transfer_config) if self.additional_config: - if isinstance(additional_config := self.additional_config, dict): - additional_config_hash = hashlib.md5( - json.dumps(additional_config, sort_keys=True).encode(), - usedforsecurity=False, - ).hexdigest() + additional_config = self.additional_config + if isinstance(additional_config, dict): + vllm_factors.append(additional_config) else: - additional_config_hash = additional_config.compile_factors() - vllm_factors.append(additional_config_hash) + vllm_factors.append(additional_config.compile_factors()) else: - vllm_factors.append("None") - factors.append(vllm_factors) + vllm_factors.append(None) - if return_factors: - return vllm_factors if vllm_factors else [] - - hash_str = hashlib.md5( - str(factors).encode(), usedforsecurity=False - ).hexdigest()[:10] - return hash_str + factors.append(vllm_factors) + return {"vllm": factors} def pad_for_cudagraph(self, batch_size: int) -> int: # if batch_size > self.compilation_config.max_cudagraph_capture_size, diff --git a/vllm/distributed/eplb/eplb_state.py b/vllm/distributed/eplb/eplb_state.py index a352e43353b0..e56e4e5ce136 100644 --- a/vllm/distributed/eplb/eplb_state.py +++ b/vllm/distributed/eplb/eplb_state.py @@ -29,12 +29,12 @@ import time from collections.abc import Sequence from dataclasses import dataclass -from typing import cast import torch from torch.distributed import ProcessGroup, all_reduce from vllm.config import ModelConfig, ParallelConfig +from vllm.config.utils import hash_factors from vllm.distributed.parallel_state import ( get_ep_group, get_node_count, @@ -386,7 +386,8 @@ def add_model( ) self.expert_rearrangement_step = 0 - model_hash = cast(str, model_config.compile_factors()) + model_factors = model_config.compile_factors() or {} + model_hash = hash_factors(model_factors) self.model_states[model_hash] = EplbModelState( physical_to_logical_map, logical_to_physical_map, diff --git a/vllm/envs.py b/vllm/envs.py index cd8db7584b4e..6cc471cb8247 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -10,7 +10,7 @@ from collections.abc import Callable from typing import TYPE_CHECKING, Any, Literal -from vllm.config.utils import hash_factors, normalize_value +from vllm.config.utils import normalize_value if TYPE_CHECKING: VLLM_HOST_IP: str = "" @@ -1678,9 +1678,6 @@ def collect_compile_factors() -> dict[str, object]: return factors -def compile_factors(*, return_factors: bool = True): - """Return env compile factors (dict) or hashed string when requested.""" - factors = collect_compile_factors() - if return_factors: - return factors - return hash_factors(factors) +def compile_factors() -> dict[str, object]: + """Return env compile factors.""" + return collect_compile_factors() diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 4630d634451e..3904f20a30be 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -17,6 +17,7 @@ import zmq from vllm.config import ParallelConfig, VllmConfig +from vllm.config.utils import hash_factors from vllm.distributed import stateless_destroy_torch_distributed_process_group from vllm.envs import enable_envs_cache from vllm.logger import init_logger @@ -751,9 +752,10 @@ def _perform_handshake( "dp_stats_address": dp_stats_address, } if vllm_config.parallel_config.data_parallel_size > 1: - ready_msg["parallel_config_hash"] = ( - vllm_config.parallel_config.compile_factors() + parallel_factors = ( + vllm_config.parallel_config.compile_factors() or {} ) + ready_msg["parallel_config_hash"] = hash_factors(parallel_factors) handshake_socket.send(msgspec.msgpack.encode(ready_msg)) diff --git a/vllm/v1/engine/utils.py b/vllm/v1/engine/utils.py index 0157806655c6..207fa40a47ea 100644 --- a/vllm/v1/engine/utils.py +++ b/vllm/v1/engine/utils.py @@ -9,7 +9,7 @@ from enum import Enum, auto from multiprocessing import Process, connection from multiprocessing.process import BaseProcess -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING from unittest.mock import patch import msgspec @@ -17,6 +17,7 @@ from vllm import envs from vllm.config import CacheConfig, ParallelConfig, VllmConfig +from vllm.config.utils import hash_factors from vllm.logger import init_logger from vllm.platforms import current_platform from vllm.ray.ray_env import get_env_vars_to_copy @@ -1000,7 +1001,8 @@ def wait_for_engine_startup( f"dp lb mode" ) - parallel_hash = cast(str, parallel_config.compile_factors()) + parallel_factors = parallel_config.compile_factors() or {} + parallel_hash = hash_factors(parallel_factors) if status == "HELLO" and engine.state == CoreEngineState.NEW: # Send init message with DP config info and config hash. # The config hash ensures all DP workers have compatible configs. From 3ab19fcfd641c49033baacf963e386186b3da638 Mon Sep 17 00:00:00 2001 From: vnadathur Date: Thu, 20 Nov 2025 16:56:12 -0800 Subject: [PATCH 12/57] fix Updated references from 'compile_factors' to 'compute_hash' in the documentation. Signed-off-by: vnadathur Signed-off-by: vnadathur --- docs/design/torch_compile.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/design/torch_compile.md b/docs/design/torch_compile.md index 996d5b706297..27edc4f89201 100644 --- a/docs/design/torch_compile.md +++ b/docs/design/torch_compile.md @@ -19,8 +19,8 @@ vLLM will take all the available factors into consideration, and decide a direct The factors considered include: -- All the related configs (see the `compile_factors` functions in their respective configs in the [config folder](../../vllm/config)) -- PyTorch configs (see the `compile_factors` functions in the [compiler_interface.py](../../vllm/compilation/compiler_interface.py)) +- All the related configs (see the `compute_hash` functions in their respective configs in the [config folder](../../vllm/config)) +- PyTorch configs (see the `compute_hash` functions in the [compiler_interface.py](../../vllm/compilation/compiler_interface.py)) - The model's forward function and the relevant functions called by the forward function (see below) With all these factors taken into consideration, usually we can guarantee that the cache is safe to use, and will not cause any unexpected behavior. Therefore, the cache is enabled by default. If you want to debug the compilation process, or if you suspect the cache is causing some issues, you can disable it by setting the environment variable `VLLM_DISABLE_COMPILE_CACHE=1`. From 755e864b781fbe95b4ef03d38f137456ad9cac45 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Thu, 20 Nov 2025 20:33:33 -0500 Subject: [PATCH 13/57] fixed precommit Signed-off-by: WorldExplored Co-Authored-By: vnadathur <236933696+vnadathur@users.noreply.github.com> --- vllm/compilation/backends.py | 8 ++------ vllm/config/device.py | 2 +- vllm/config/observability.py | 2 +- vllm/v1/engine/core.py | 4 +--- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index f34fdba29d4f..57e4209b8ea6 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -93,9 +93,7 @@ def __init__(self, compilation_config: CompilationConfig): self.compilation_config = compilation_config self.compiler = make_compiler(compilation_config) - def compile_factors( - self, vllm_config: VllmConfig - ) -> tuple[dict[str, object], str]: + def compile_factors(self, vllm_config: VllmConfig) -> tuple[dict[str, object], str]: raw_factors = self.compiler.compile_factors(vllm_config) or {} hash_str = hashlib.md5( str(raw_factors).encode(), usedforsecurity=False @@ -627,9 +625,7 @@ def __call__( entry["content"] = content code_factors.append(entry) hash_content_parts.append(content) - code_hash = hashlib.sha256( - "\n".join(hash_content_parts).encode() - ).hexdigest() + code_hash = hashlib.sha256("\n".join(hash_content_parts).encode()).hexdigest() # Clear after consumption self.compilation_config.traced_files.clear() if not self.compilation_config.cache_dir: diff --git a/vllm/config/device.py b/vllm/config/device.py index 477eb89a5c1a..13bced3126c6 100644 --- a/vllm/config/device.py +++ b/vllm/config/device.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from dataclasses import field -from typing import Any, Literal +from typing import Literal import torch from pydantic import ConfigDict, SkipValidation diff --git a/vllm/config/observability.py b/vllm/config/observability.py index ac147e2ebba7..3ea44a80aa5d 100644 --- a/vllm/config/observability.py +++ b/vllm/config/observability.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from functools import cached_property -from typing import Any, Literal, cast +from typing import Literal, cast from packaging.version import parse from pydantic import field_validator, model_validator diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 3904f20a30be..bea9ca93f850 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -752,9 +752,7 @@ def _perform_handshake( "dp_stats_address": dp_stats_address, } if vllm_config.parallel_config.data_parallel_size > 1: - parallel_factors = ( - vllm_config.parallel_config.compile_factors() or {} - ) + parallel_factors = vllm_config.parallel_config.compile_factors() or {} ready_msg["parallel_config_hash"] = hash_factors(parallel_factors) handshake_socket.send(msgspec.msgpack.encode(ready_msg)) From bdbfcb026393db632b15be96df9f20c558483218 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Thu, 20 Nov 2025 21:35:33 -0500 Subject: [PATCH 14/57] added split shared compilation helpers PR code PR: https://github.com/vllm-project/vllm/pull/29057 Signed-off-by: WorldExplored Co-Authored-By: vnadathur <236933696+vnadathur@users.noreply.github.com> --- vllm/compilation/backends.py | 20 ++++++++++++-------- vllm/compilation/caching.py | 26 ++++++++++++++------------ vllm/compilation/decorators.py | 7 ++++--- vllm/config/lora.py | 5 ++--- 4 files changed, 32 insertions(+), 26 deletions(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 57e4209b8ea6..f7aac3192633 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -25,14 +25,16 @@ should_split, ) from vllm.config import CompilationConfig, CUDAGraphMode, VllmConfig -from vllm.config.utils import hash_factors from vllm.logger import init_logger from vllm.logging_utils import lazy from vllm.platforms import current_platform from vllm.utils.import_utils import resolve_obj_by_qualname from vllm.utils.torch_utils import is_torch_equal_or_newer -from .caching import VllmSerializableFunction +from .caching import ( + VllmSerializableFunction, + compute_env_and_config_hashes, +) from .compiler_interface import ( CompilerInterface, EagerAdaptor, @@ -591,15 +593,17 @@ def __call__( vllm_config = self.vllm_config # Minimal hashing here with existing utilities, reused below. - env_factors = envs.compile_factors() - env_hash = hash_factors(env_factors) - # Compute config/compiler/code hashes once and reuse - config_factors = vllm_config.compile_factors() or {} - config_hash = hash_factors(config_factors) + ( + env_hash, + config_hash, + env_factors, + config_factors, + ) = compute_env_and_config_hashes(vllm_config) compiler_factors, compiler_hash = self.compiler_manager.compile_factors( vllm_config ) - forward_code_files = list(sorted(self.compilation_config.traced_files)) + traced_files = set(self.compilation_config.traced_files) + forward_code_files = list(sorted(traced_files)) logger.debug( "Traced files (to be considered for compilation cache):\n%s", diff --git a/vllm/compilation/caching.py b/vllm/compilation/caching.py index b05ee651b077..a3e8c28b2f32 100644 --- a/vllm/compilation/caching.py +++ b/vllm/compilation/caching.py @@ -135,18 +135,20 @@ def co_name(self): return "VllmSerializableFunction" -def compilation_config_hash_factors(vllm_config: VllmConfig) -> list[str]: - factors = [] - # 0. factors come from the env, for example, The values of - # VLLM_PP_LAYER_PARTITION will affect the computation graph. - env_hash = hash_factors(envs.compile_factors()) - factors.append(env_hash) - - # 1. factors come from the vllm_config (it mainly summarizes how the - # model is created) - config_hash = vllm_config.compile_factors() - factors.append(config_hash) - return factors +def compute_env_and_config_hashes( + vllm_config: VllmConfig, +) -> tuple[str, str, dict[str, object], dict[str, object]]: + """ + Return the hashed environment factors, config hash, and raw factors. + Both AOT and JIT cache paths rely on this helper to ensure their cache keys + stay in sync. + """ + + env_factors = envs.compile_factors() + env_hash = hash_factors(env_factors) + config_factors = vllm_config.compile_factors() or {} + config_hash = hash_factors(config_factors) + return env_hash, config_hash, env_factors, config_factors def _compute_code_hash_with_content(file_contents: dict[str, str]) -> str: diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py index 11a18c0e6bb7..6bf40a5b57fa 100644 --- a/vllm/compilation/decorators.py +++ b/vllm/compilation/decorators.py @@ -29,6 +29,7 @@ from vllm.utils.import_utils import resolve_obj_by_qualname from vllm.utils.torch_utils import supports_dynamo +from .caching import compute_env_and_config_hashes from .monitor import start_monitoring_torch_compile logger = init_logger(__name__) @@ -352,10 +353,10 @@ def __call__(self, *args, **kwargs): serialized backend artifacts), then we need to generate a new AOT compile artifact from scratch. """ - from .caching import compilation_config_hash_factors - - factors: list[str] = compilation_config_hash_factors(self.vllm_config) + # Keep AOT cache key in sync with JIT: env factors + config hash + model. + env_hash, config_hash, *_ = compute_env_and_config_hashes(self.vllm_config) + factors: list[str] = [env_hash, config_hash] factors.append(_model_hash_key(self.forward)) hash_key = hashlib.sha256(str(factors).encode()).hexdigest() diff --git a/vllm/config/lora.py b/vllm/config/lora.py index 855919617c6f..e75f7181d4db 100644 --- a/vllm/config/lora.py +++ b/vllm/config/lora.py @@ -1,15 +1,14 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import TYPE_CHECKING, Any, ClassVar, Literal +from typing import TYPE_CHECKING, Any, Literal import torch from pydantic import ConfigDict, Field, model_validator from pydantic.dataclasses import dataclass from typing_extensions import Self -from vllm.config.utils import -Result, config, normalize_value +from vllm.config.utils import HashResult, config, normalize_value from vllm.logger import init_logger if TYPE_CHECKING: From caab640e299077d541fa9ca9822cb1fcf6b8837f Mon Sep 17 00:00:00 2001 From: vnadathur Date: Fri, 21 Nov 2025 13:54:33 -0800 Subject: [PATCH 15/57] use hash_factors Signed-off-by: vnadathur --- vllm/compilation/backends.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index f7aac3192633..af2cdc88a46e 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -25,6 +25,7 @@ should_split, ) from vllm.config import CompilationConfig, CUDAGraphMode, VllmConfig +from vllm.config.utils import hash_factors from vllm.logger import init_logger from vllm.logging_utils import lazy from vllm.platforms import current_platform @@ -610,9 +611,7 @@ def __call__( lazy(lambda: "\n".join(forward_code_files)), ) code_factors: list[dict[str, str]] = [] - hash_content_parts: list[str] = [] for filepath in forward_code_files: - hash_content_parts.append(filepath) entry: dict[str, str] = {"path": filepath} if filepath == "": # This means the function was dynamically generated, with @@ -628,8 +627,7 @@ def __call__( continue entry["content"] = content code_factors.append(entry) - hash_content_parts.append(content) - code_hash = hashlib.sha256("\n".join(hash_content_parts).encode()).hexdigest() + code_hash = hash_factors({"files": code_factors}) # Clear after consumption self.compilation_config.traced_files.clear() if not self.compilation_config.cache_dir: From e4dc2b78a437105ea8db4c85e2c7f39f064cc223 Mon Sep 17 00:00:00 2001 From: vnadathur Date: Fri, 21 Nov 2025 15:22:15 -0800 Subject: [PATCH 16/57] fix returning factors Signed-off-by: vnadathur Co-Authored-By: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- vllm/compilation/backends.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index af2cdc88a46e..e8cc106a9cd4 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -96,12 +96,8 @@ def __init__(self, compilation_config: CompilationConfig): self.compilation_config = compilation_config self.compiler = make_compiler(compilation_config) - def compile_factors(self, vllm_config: VllmConfig) -> tuple[dict[str, object], str]: - raw_factors = self.compiler.compile_factors(vllm_config) or {} - hash_str = hashlib.md5( - str(raw_factors).encode(), usedforsecurity=False - ).hexdigest()[:10] - return raw_factors, hash_str + def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object]: + return self.compiler.compile_factors(vllm_config) or {} @contextmanager def compile_context(self, runtime_shape: int | None = None): @@ -600,9 +596,8 @@ def __call__( env_factors, config_factors, ) = compute_env_and_config_hashes(vllm_config) - compiler_factors, compiler_hash = self.compiler_manager.compile_factors( - vllm_config - ) + compiler_factors = self.compiler_manager.compile_factors(vllm_config) + compiler_hash = hash_factors(compiler_factors) traced_files = set(self.compilation_config.traced_files) forward_code_files = list(sorted(traced_files)) @@ -635,10 +630,15 @@ def __call__( # that affects the compilation. if none of the factors change, # the cache dir will be the same so that we can reuse the compiled # graph. - factors = [env_hash, config_hash, code_hash, compiler_hash] + all_factors = { + "env": env_factors, + "config": config_factors, + "code": {"files": code_factors}, + "compiler": compiler_factors, + } # Use SHA-256 for cache key hashing to be consistent across # compile_factors functions. Truncate for a short cache dir name. - hash_key = hashlib.sha256(str(factors).encode()).hexdigest()[:10] + hash_key = hash_factors(all_factors)[:10] cache_dir = os.path.join( envs.VLLM_CACHE_ROOT, "torch_compile_cache", hash_key ) From a23a880fa6e59aa6dee7b7795b984675bf732199 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Fri, 21 Nov 2025 23:05:55 -0500 Subject: [PATCH 17/57] fixed precommit Signed-off-by: WorldExplored --- vllm/compilation/backends.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index e8cc106a9cd4..c4dcdb841683 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -3,7 +3,6 @@ import ast import dataclasses -import hashlib import json import operator import os From ce558936ebc13c001548cefb6936e6ad648d80a4 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Tue, 25 Nov 2025 22:43:02 -0500 Subject: [PATCH 18/57] addressed reviewer concerns Signed-off-by: WorldExplored --- vllm/compilation/backends.py | 26 ++++++-------------- vllm/compilation/caching.py | 23 +++++++++++++++++ vllm/compilation/pass_manager.py | 1 + vllm/config/cache.py | 6 ++--- vllm/config/utils.py | 5 ++-- vllm/config/vllm.py | 5 ++-- vllm/distributed/eplb/eplb_state.py | 38 +++++++++++++++++++++-------- vllm/envs.py | 7 +----- 8 files changed, 69 insertions(+), 42 deletions(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index e500ad654552..8329e1e9b807 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -12,6 +12,7 @@ from contextlib import contextmanager from copy import deepcopy from functools import partial +from pathlib import Path from typing import Any import torch @@ -35,6 +36,7 @@ from .caching import ( VllmSerializableFunction, compute_env_and_config_hashes, + get_code_factors, ) from .compiler_interface import ( CompilerInterface, @@ -607,29 +609,15 @@ def __call__( compiler_factors = self.compiler_manager.compile_factors(vllm_config) compiler_hash = hash_factors(compiler_factors) traced_files = set(self.compilation_config.traced_files) - forward_code_files = list(sorted(traced_files)) + forward_code_files = sorted( + (Path(filepath) for filepath in traced_files), key=str + ) logger.debug( "Traced files (to be considered for compilation cache):\n%s", - lazy(lambda: "\n".join(forward_code_files)), + lazy(lambda: "\n".join(map(str, forward_code_files))), ) - code_factors: list[dict[str, str]] = [] - for filepath in forward_code_files: - entry: dict[str, str] = {"path": filepath} - if filepath == "": - # This means the function was dynamically generated, with - # e.g. exec(). We can't actually check these. - code_factors.append(entry) - continue - try: - with open(filepath) as f: - content = f.read() - except Exception: - logger.warning("Failed to read file %s", filepath) - code_factors.append(entry) - continue - entry["content"] = content - code_factors.append(entry) + code_factors = get_code_factors(forward_code_files) code_hash = hash_factors({"files": code_factors}) # Clear after consumption self.compilation_config.traced_files.clear() diff --git a/vllm/compilation/caching.py b/vllm/compilation/caching.py index df754d7441f9..c94cf917a872 100644 --- a/vllm/compilation/caching.py +++ b/vllm/compilation/caching.py @@ -4,6 +4,7 @@ import inspect import os import pickle +from pathlib import Path from unittest.mock import patch import torch @@ -25,6 +26,28 @@ logger = init_logger(__name__) +def get_code_factors(forward_code_files: list[Path]) -> list[dict[str, str]]: + """Return per-file factors for compile cache hashing.""" + code_factors: list[dict[str, str]] = [] + for filepath in forward_code_files: + path_str = str(filepath) + entry: dict[str, str] = {"path": path_str} + if path_str == "": + # Dynamically generated code (e.g., exec); nothing to hash. + code_factors.append(entry) + continue + try: + with filepath.open() as f: + content = f.read() + except Exception: + logger.warning("Failed to read file %s", path_str) + code_factors.append(entry) + continue + entry["hash"] = hash_factors({"content": content}) + code_factors.append(entry) + return code_factors + + class VllmSerializableFunction(SerializableCallable): """ A wrapper around a compiled function by vllm. It will forward the tensor diff --git a/vllm/compilation/pass_manager.py b/vllm/compilation/pass_manager.py index 11d7ea1e4a63..c10e0e92d986 100644 --- a/vllm/compilation/pass_manager.py +++ b/vllm/compilation/pass_manager.py @@ -130,6 +130,7 @@ def uuid(self): state = {"pass_config": self.pass_config.compile_factors(), "passes": []} for pass_ in self.passes: state["passes"].append(pass_.uuid()) + state["passes"].append(self.post_cleanup.uuid()) state["passes"].append(self.fix_functionalization.uuid()) return InductorPass.hash_dict(state) diff --git a/vllm/config/cache.py b/vllm/config/cache.py index cc391efa403d..7b902ddd2016 100644 --- a/vllm/config/cache.py +++ b/vllm/config/cache.py @@ -7,7 +7,7 @@ from pydantic import Field, SkipValidation, field_validator from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config, get_compile_factors +from vllm.config.utils import CompileFactors, config, get_compile_factors from vllm.logger import init_logger from vllm.utils.mem_constants import GiB_bytes from vllm.utils.mem_utils import get_cpu_memory @@ -147,7 +147,7 @@ class CacheConfig: 'native' (vLLM native CPU offloading), 'lmcache' This option must be used together with kv_offloading_size.""" - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ WARNING: Whenever a new field is added to this config, review `ignored_factors` to decide whether the field should be excluded. @@ -180,7 +180,7 @@ def compile_factors(self) -> HashResult: } factors = get_compile_factors(self, ignored_factors) - return factors or None + return factors def metrics_info(self): # convert cache_config to dict(key: str, value: str) for prometheus diff --git a/vllm/config/utils.py b/vllm/config/utils.py index c7a11b0f3182..d08b4de2db7f 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -26,7 +26,7 @@ ConfigType = type[DataclassInstance] ConfigT = TypeVar("ConfigT", bound=ConfigType) -HashResult = dict[str, object] | None +CompileFactors = dict[str, object] | None def config(cls: ConfigT) -> ConfigT: @@ -157,7 +157,7 @@ def is_init_field(cls: ConfigType, name: str) -> bool: @runtime_checkable class SupportsCompileFactors(Protocol): - def compile_factors(self) -> HashResult: ... + def compile_factors(self) -> CompileFactors: ... class SupportsMetricsInfo(Protocol): @@ -275,6 +275,7 @@ def get_compile_factors( ) -> dict[str, object]: """Gets the factors used for hashing a config class. - Includes all dataclass fields not in `ignored_factors`. + - Uses .compile_factors() for nested dataclasses that support it - Errors on non-normalizable values. """ factors: dict[str, object] = {} diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 4d481314a525..e63aa32f32e2 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -39,7 +39,7 @@ from .scheduler import SchedulerConfig from .speculative import SpeculativeConfig from .structured_outputs import StructuredOutputsConfig -from .utils import HashResult, SupportsCompileFactors, config +from .utils import CompileFactors, SupportsCompileFactors, config if TYPE_CHECKING: from transformers import PretrainedConfig @@ -116,7 +116,7 @@ class VllmConfig: instance_id: str = "" """The ID of the vLLM instance.""" - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if @@ -165,6 +165,7 @@ def _append_config(config_obj: SupportsCompileFactors | None) -> None: if self.additional_config: additional_config = self.additional_config if isinstance(additional_config, dict): + assert isinstance(additional_config, SupportsCompileFactors) vllm_factors.append(additional_config) else: vllm_factors.append(additional_config.compile_factors()) diff --git a/vllm/distributed/eplb/eplb_state.py b/vllm/distributed/eplb/eplb_state.py index b2b5dc613a08..9864b85d1a44 100644 --- a/vllm/distributed/eplb/eplb_state.py +++ b/vllm/distributed/eplb/eplb_state.py @@ -482,18 +482,36 @@ def add_model( ) self.expert_rearrangement_step = 0 + expert_buffer = [torch.empty_like(w) for w in model.expert_weights[0]] + + model_state = EplbModelState( + physical_to_logical_map=physical_to_logical_map, + logical_to_physical_map=logical_to_physical_map, + logical_replica_count=logical_replica_count, + expert_load_pass=expert_load_pass, + expert_load_window=expert_load_window, + model_name=model_config.model, + model=model, + expert_buffer=expert_buffer, + buffer_lock=threading.Lock(), + buffer_ready_event=None, + ep_buffer_ready=0, + layer_to_transfer=0, + rebalanced=False, + pending_global_ready_check=False, + is_unchanged=[], + is_received_locally=[], + experts_recv_loc={}, + is_async_enabled=self.is_async, + cuda_device_index=self.cuda_device_index, + new_physical_to_logical_map=new_physical_to_logical_map, + new_logical_to_physical_map=new_logical_to_physical_map, + new_logical_replica_count=new_logical_replica_count, + ) + model_factors = model_config.compile_factors() or {} model_hash = hash_factors(model_factors) - self.model_states[model_hash] = EplbModelState( - physical_to_logical_map, - logical_to_physical_map, - logical_replica_count, - expert_load_pass, - expert_load_window, - model_config.model, - model, - ) - self.model_states[model_config.compute_hash()] = model_state + self.model_states[model_hash] = model_state def step( self, diff --git a/vllm/envs.py b/vllm/envs.py index 47ef14f57a83..20a85881bc1d 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -1575,7 +1575,7 @@ def is_set(name: str): raise AttributeError(f"module {__name__!r} has no attribute {name!r}") -def collect_compile_factors() -> dict[str, object]: +def compile_factors() -> dict[str, object]: """Collect env vars used for torch.compile cache keys.""" ignored_factors: set[str] = { "MAX_JOBS", @@ -1680,8 +1680,3 @@ def collect_compile_factors() -> dict[str, object]: factors[var] = normalize_value(os.getenv(var)) return factors - - -def compile_factors() -> dict[str, object]: - """Return env compile factors.""" - return collect_compile_factors() From 9998552cc83e08c560d9e9af7ab8a858e4791409 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Tue, 25 Nov 2025 23:01:20 -0500 Subject: [PATCH 19/57] fixed precommit Signed-off-by: WorldExplored Co-Authored-By: vnadathur <236933696+vnadathur@users.noreply.github.com> --- vllm/compilation/compiler_interface.py | 1 - vllm/config/load.py | 1 - vllm/config/lora.py | 1 - vllm/config/pooler.py | 1 - vllm/config/scheduler.py | 1 - vllm/config/speculative.py | 1 - vllm/config/vllm.py | 1 - 7 files changed, 7 deletions(-) diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py index 209c6f5a1817..5462b2695515 100644 --- a/vllm/compilation/compiler_interface.py +++ b/vllm/compilation/compiler_interface.py @@ -15,7 +15,6 @@ import vllm.envs as envs from vllm.compilation.counter import compilation_counter from vllm.config import VllmConfig -from vllm.utils.hashing import safe_hash from vllm.utils.torch_utils import is_torch_equal_or_newer diff --git a/vllm/config/load.py b/vllm/config/load.py index 7d7ec1b9f443..3a7b7742de81 100644 --- a/vllm/config/load.py +++ b/vllm/config/load.py @@ -8,7 +8,6 @@ from vllm.config.utils import HashResult, config from vllm.logger import init_logger -from vllm.utils.hashing import safe_hash if TYPE_CHECKING: from vllm.model_executor.model_loader import LoadFormats diff --git a/vllm/config/lora.py b/vllm/config/lora.py index 4ab635672ba0..e75f7181d4db 100644 --- a/vllm/config/lora.py +++ b/vllm/config/lora.py @@ -10,7 +10,6 @@ from vllm.config.utils import HashResult, config, normalize_value from vllm.logger import init_logger -from vllm.utils.hashing import safe_hash if TYPE_CHECKING: from vllm.config import ModelConfig diff --git a/vllm/config/pooler.py b/vllm/config/pooler.py index 636f1047987d..05aa36841d5c 100644 --- a/vllm/config/pooler.py +++ b/vllm/config/pooler.py @@ -5,7 +5,6 @@ from vllm.config.utils import HashResult, config from vllm.logger import init_logger -from vllm.utils.hashing import safe_hash logger = init_logger(__name__) diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index 9b465a68b1a8..88d4e359f6c8 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -11,7 +11,6 @@ from vllm.config.utils import HashResult, config from vllm.logger import init_logger -from vllm.utils.hashing import safe_hash from vllm.utils.import_utils import resolve_obj_by_qualname if TYPE_CHECKING: diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index c6931898ded4..03a12c62db7d 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -12,7 +12,6 @@ from vllm.config.parallel import ParallelConfig from vllm.config.utils import HashResult, config, normalize_value from vllm.logger import init_logger -from vllm.utils.hashing import safe_hash from vllm.utils.import_utils import LazyLoader, has_arctic_inference if TYPE_CHECKING: diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index e63aa32f32e2..e6e6b758fb1f 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -23,7 +23,6 @@ from vllm.logger import enable_trace_function_call, init_logger from vllm.transformers_utils.runai_utils import is_runai_obj_uri from vllm.utils import random_uuid -from vllm.utils.hashing import safe_hash from .cache import CacheConfig from .compilation import CompilationConfig, CompilationMode, CUDAGraphMode From 991a335ddfc5a6911b76851bae985574950f6f9b Mon Sep 17 00:00:00 2001 From: vnadathur Date: Wed, 26 Nov 2025 10:35:59 -0800 Subject: [PATCH 20/57] reviewer feedback Signed-off-by: vnadathur Co-Authored-By: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- tests/compile/fullgraph/test_toy_llama.py | 12 ++++-------- vllm/config/compilation.py | 4 ++-- vllm/config/device.py | 2 +- vllm/config/ec_transfer.py | 2 +- vllm/config/kv_transfer.py | 2 +- vllm/config/load.py | 2 +- vllm/config/lora.py | 4 +--- vllm/config/model.py | 2 +- vllm/config/multimodal.py | 4 +--- vllm/config/observability.py | 2 +- vllm/config/parallel.py | 2 +- vllm/config/pooler.py | 2 +- vllm/config/scheduler.py | 2 +- vllm/config/speculative.py | 4 +--- vllm/config/structured_outputs.py | 2 +- 15 files changed, 19 insertions(+), 29 deletions(-) diff --git a/tests/compile/fullgraph/test_toy_llama.py b/tests/compile/fullgraph/test_toy_llama.py index 52052399325a..339d6e340d0f 100644 --- a/tests/compile/fullgraph/test_toy_llama.py +++ b/tests/compile/fullgraph/test_toy_llama.py @@ -26,6 +26,7 @@ VllmConfig, set_current_vllm_config, ) +from vllm.config.utils import get_compile_factors from vllm.forward_context import BatchDescriptor, set_forward_context from vllm.utils.torch_utils import is_torch_equal_or_newer @@ -45,14 +46,9 @@ class LlamaConfig: tractable_init: bool = False random_seed: int = 0 - def compile_factors(self) -> list[tuple[str, Any]]: - factors: list[tuple[str, Any]] = [] - for key, value in self.__dict__.items(): - if key == "random_seed": - continue - factors.append((key, value)) - factors.sort() - return factors + def compile_factors(self) -> dict[str, Any]: + ignored = {"random_seed"} + return get_compile_factors(self, ignored) def __post_init__(self): assert self.mlp_size >= self.hidden_size diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index b0cfd1c62a1e..2860bbf48b56 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -166,7 +166,7 @@ def compile_factors(self) -> HashResult: Any future fields that don't affect compilation should be excluded. """ factors = asdict(self) - return factors or None + return factors or {} def __post_init__(self) -> None: if not self.enable_noop: @@ -565,7 +565,7 @@ def compile_factors(self) -> HashResult: } factors = get_compile_factors(self, ignored_factors) - return factors or None + return factors or {} def __repr__(self) -> str: exclude = { diff --git a/vllm/config/device.py b/vllm/config/device.py index 13bced3126c6..188e6a471eb1 100644 --- a/vllm/config/device.py +++ b/vllm/config/device.py @@ -43,7 +43,7 @@ def compile_factors(self) -> HashResult: # no factors to consider. # the device/platform information will be summarized # by torch/vllm automatically. - return None + return {} def __post_init__(self): if self.device == "auto": diff --git a/vllm/config/ec_transfer.py b/vllm/config/ec_transfer.py index 023eb03b1173..a9734ee2d867 100644 --- a/vllm/config/ec_transfer.py +++ b/vllm/config/ec_transfer.py @@ -72,7 +72,7 @@ def compile_factors(self) -> HashResult: the final hidden states. """ # This config does not affect the compiled graph. - return None + return {} def __post_init__(self) -> None: if self.engine_id is None: diff --git a/vllm/config/kv_transfer.py b/vllm/config/kv_transfer.py index 60edab0987bd..ab1e3b2c79fd 100644 --- a/vllm/config/kv_transfer.py +++ b/vllm/config/kv_transfer.py @@ -76,7 +76,7 @@ def compile_factors(self) -> HashResult: the final hidden states. """ # This config does not affect the compiled graph. - return None + return {} def __post_init__(self) -> None: if self.engine_id is None: diff --git a/vllm/config/load.py b/vllm/config/load.py index 3a7b7742de81..d7e29e81b705 100644 --- a/vllm/config/load.py +++ b/vllm/config/load.py @@ -101,7 +101,7 @@ def compile_factors(self) -> HashResult: the final hidden states. """ # This config does not affect the compiled graph. - return None + return {} @field_validator("load_format", mode="after") def _lowercase_load_format(cls, load_format: str) -> str: diff --git a/vllm/config/lora.py b/vllm/config/lora.py index e75f7181d4db..d2a1083f3fd8 100644 --- a/vllm/config/lora.py +++ b/vllm/config/lora.py @@ -76,9 +76,7 @@ def compile_factors(self) -> HashResult: self.lora_vocab_padding_size, ] normalized = normalize_value(factors) - if not normalized: - return None - return {"factors": normalized} + return {"factors": normalized} if normalized else {} @model_validator(mode="after") def _validate_lora_config(self) -> Self: diff --git a/vllm/config/model.py b/vllm/config/model.py index a6d7a7636361..d7d97c11d72e 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -371,7 +371,7 @@ def compile_factors(self) -> HashResult: } factors = get_compile_factors(self, ignored_factors) - return factors or None + return factors or {} def _update_nested( self, diff --git a/vllm/config/multimodal.py b/vllm/config/multimodal.py index ca9ca31dba2d..b68a0e11bfa2 100644 --- a/vllm/config/multimodal.py +++ b/vllm/config/multimodal.py @@ -216,9 +216,7 @@ def compile_factors(self) -> HashResult: else None ] normalized = normalize_value(factors) - if not normalized: - return None - return {"factors": normalized} + return {"factors": normalized} if normalized else {} def get_limit_per_prompt(self, modality: str) -> int: """ diff --git a/vllm/config/observability.py b/vllm/config/observability.py index 3ea44a80aa5d..42bb2fc1599a 100644 --- a/vllm/config/observability.py +++ b/vllm/config/observability.py @@ -76,7 +76,7 @@ def compile_factors(self) -> HashResult: """ # no factors to consider. # this config will not affect the computation graph. - return None + return {} @field_validator("show_hidden_metrics_for_version") @classmethod diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index 2badeace2fb3..8639d44c4c76 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -489,7 +489,7 @@ def compile_factors(self) -> HashResult: factors = get_compile_factors(self, ignored_factors) # Explicitly include backend affecting env factor as before factors["VLLM_ALL2ALL_BACKEND"] = str(envs.VLLM_ALL2ALL_BACKEND) - return factors or None + return factors or {} def __post_init__(self) -> None: # Set all2all_backend from env var if not specified, with deprecation warning diff --git a/vllm/config/pooler.py b/vllm/config/pooler.py index 05aa36841d5c..da3995346941 100644 --- a/vllm/config/pooler.py +++ b/vllm/config/pooler.py @@ -99,7 +99,7 @@ def compile_factors(self) -> HashResult: # no factors to consider. # this config will not affect the computation graph. # No compile-time factors. - return None + return {} def get_use_activation(o: object): diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index 88d4e359f6c8..2b0b8488cbb6 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -175,7 +175,7 @@ def compile_factors(self) -> HashResult: the final hidden states. """ # This config does not affect the compiled graph. - return None + return {} @field_validator("scheduler_cls", "async_scheduling", mode="wrap") @classmethod diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index 03a12c62db7d..818a1b35504a 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -163,9 +163,7 @@ def compile_factors(self) -> HashResult: self.method == "eagle3" ] normalized = normalize_value(factors) - if not normalized: - return None - return {"factors": normalized} + return {"factors": normalized} if normalized else {} @staticmethod def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig: diff --git a/vllm/config/structured_outputs.py b/vllm/config/structured_outputs.py index 02b5376f5e87..b208d11bde98 100644 --- a/vllm/config/structured_outputs.py +++ b/vllm/config/structured_outputs.py @@ -54,7 +54,7 @@ def compile_factors(self) -> HashResult: excluding anything before input ids/embeddings and after the final hidden states. """ - return None + return {} @model_validator(mode="after") def _validate_structured_output_config(self) -> Self: From b0268190f2300ee963e95d8ecd0119be2c3e5bb6 Mon Sep 17 00:00:00 2001 From: vnadathur Date: Wed, 26 Nov 2025 11:57:10 -0800 Subject: [PATCH 21/57] HashResult- CompileFactor Signed-off-by: vnadathur Co-Authored-By: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- vllm/compilation/compiler_interface.py | 8 ++++---- vllm/config/compilation.py | 6 +++--- vllm/config/device.py | 4 ++-- vllm/config/ec_transfer.py | 4 ++-- vllm/config/kv_transfer.py | 4 ++-- vllm/config/load.py | 4 ++-- vllm/config/lora.py | 4 ++-- vllm/config/model.py | 4 ++-- vllm/config/multimodal.py | 4 ++-- vllm/config/observability.py | 4 ++-- vllm/config/parallel.py | 4 ++-- vllm/config/pooler.py | 4 ++-- vllm/config/scheduler.py | 4 ++-- vllm/config/speculative.py | 4 ++-- vllm/config/structured_outputs.py | 4 ++-- vllm/config/utils.py | 5 ++--- vllm/config/vllm.py | 11 +++++++---- 17 files changed, 42 insertions(+), 40 deletions(-) diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py index 5462b2695515..d57c265a0c23 100644 --- a/vllm/compilation/compiler_interface.py +++ b/vllm/compilation/compiler_interface.py @@ -45,7 +45,7 @@ def initialize_cache( """ pass - def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object] | None: + def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object]: """ Gather compiler-specific factors that influence the generated code. @@ -54,7 +54,7 @@ def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object] | None: additional data that uniquely identifies the compiler's contribution to the cache key. """ - return None + return {} def compile( self, @@ -193,7 +193,7 @@ class InductorStandaloneAdaptor(CompilerInterface): def __init__(self, save_format: Literal["binary", "unpacked"]): self.save_format = save_format - def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object] | None: + def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object]: return {"inductor_standalone": get_inductor_factors()} def initialize_cache( @@ -278,7 +278,7 @@ class InductorAdaptor(CompilerInterface): name = "inductor" - def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object] | None: + def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object]: return {"inductor": get_inductor_factors()} def initialize_cache( diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 2860bbf48b56..30f7d76a06e8 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -13,7 +13,7 @@ import vllm.envs as envs from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass -from vllm.config.utils import HashResult, config, get_compile_factors +from vllm.config.utils import CompileFactors, config, get_compile_factors from vllm.logger import init_logger from vllm.platforms import current_platform from vllm.utils.import_utils import resolve_obj_by_qualname @@ -159,7 +159,7 @@ def default_fi_allreduce_fusion_max_size_mb() -> dict[int, float]: current_platform.get_device_capability().to_int(), {} ) - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ Produces a hash unique to the pass configuration. Any new fields that affect compilation should be added to the hash. @@ -542,7 +542,7 @@ class CompilationConfig: "vllm::sparse_attn_indexer", ] - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ Provide a hash that uniquely identifies all the configs that affect the structure of the computation diff --git a/vllm/config/device.py b/vllm/config/device.py index 188e6a471eb1..eeb6a9c50ebb 100644 --- a/vllm/config/device.py +++ b/vllm/config/device.py @@ -8,7 +8,7 @@ from pydantic import ConfigDict, SkipValidation from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config +from vllm.config.utils import CompileFactors, config Device = Literal["auto", "cuda", "cpu", "tpu", "xpu"] @@ -28,7 +28,7 @@ class DeviceConfig: """Device type from the current platform. This is set in `__post_init__`.""" - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/ec_transfer.py b/vllm/config/ec_transfer.py index a9734ee2d867..2889540c8f41 100644 --- a/vllm/config/ec_transfer.py +++ b/vllm/config/ec_transfer.py @@ -6,7 +6,7 @@ from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config +from vllm.config.utils import CompileFactors, config ECProducer = Literal["ec_producer"] ECConsumer = Literal["ec_consumer"] @@ -59,7 +59,7 @@ class ECTransferConfig: """The Python module path to dynamically load the EC connector from. Only supported in V1.""" - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/kv_transfer.py b/vllm/config/kv_transfer.py index ab1e3b2c79fd..030a78daa0b2 100644 --- a/vllm/config/kv_transfer.py +++ b/vllm/config/kv_transfer.py @@ -7,7 +7,7 @@ from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config +from vllm.config.utils import CompileFactors, config KVProducer = Literal["kv_producer", "kv_both"] KVConsumer = Literal["kv_consumer", "kv_both"] @@ -63,7 +63,7 @@ class KVTransferConfig: enable_permute_local_kv: bool = False """Experiment feature flag to enable HND to NHD KV Transfer""" - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/load.py b/vllm/config/load.py index d7e29e81b705..52b6066ff23a 100644 --- a/vllm/config/load.py +++ b/vllm/config/load.py @@ -6,7 +6,7 @@ from pydantic import Field, field_validator from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config +from vllm.config.utils import CompileFactors, config from vllm.logger import init_logger if TYPE_CHECKING: @@ -88,7 +88,7 @@ class LoadConfig: see original doc for `map_location` in https://pytorch.org/docs/stable/generated/torch.load.html """ - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/lora.py b/vllm/config/lora.py index d2a1083f3fd8..ea2d46d61c46 100644 --- a/vllm/config/lora.py +++ b/vllm/config/lora.py @@ -8,7 +8,7 @@ from pydantic.dataclasses import dataclass from typing_extensions import Self -from vllm.config.utils import HashResult, config, normalize_value +from vllm.config.utils import CompileFactors, config, normalize_value from vllm.logger import init_logger if TYPE_CHECKING: @@ -55,7 +55,7 @@ class LoRAConfig: will be automatically assigned to 1-n with the names of the modalities in alphabetic order.""" - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/model.py b/vllm/config/model.py index d7d97c11d72e..f91bc3963803 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -17,7 +17,7 @@ from vllm.config.multimodal import MMCacheType, MMEncoderTPMode, MultiModalConfig from vllm.config.pooler import PoolerConfig from vllm.config.scheduler import RunnerType -from vllm.config.utils import HashResult, config, get_compile_factors, getattr_iter +from vllm.config.utils import CompileFactors, config, get_compile_factors, getattr_iter from vllm.logger import init_logger from vllm.platforms import current_platform from vllm.transformers_utils.config import ( @@ -317,7 +317,7 @@ class ModelConfig: skip_mm_profiling: InitVar[bool | None] = None video_pruning_rate: InitVar[float | None] = None - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ WARNING: Whenever a new field is added to this config, review `ignored_factors` to decide whether that field must be excluded. diff --git a/vllm/config/multimodal.py b/vllm/config/multimodal.py index b68a0e11bfa2..bfb54633957e 100644 --- a/vllm/config/multimodal.py +++ b/vllm/config/multimodal.py @@ -7,7 +7,7 @@ from pydantic import ConfigDict, Field, field_validator, model_validator from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config, normalize_value +from vllm.config.utils import CompileFactors, config, normalize_value if TYPE_CHECKING: from vllm.attention.backends.registry import AttentionBackendEnum @@ -198,7 +198,7 @@ def _validate_multimodal_config(self): ) return self - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/observability.py b/vllm/config/observability.py index 42bb2fc1599a..df40944e4447 100644 --- a/vllm/config/observability.py +++ b/vllm/config/observability.py @@ -9,7 +9,7 @@ from pydantic.dataclasses import dataclass from vllm import version -from vllm.config.utils import HashResult, config +from vllm.config.utils import CompileFactors, config DetailedTraceModules = Literal["model", "worker", "all"] @@ -62,7 +62,7 @@ def collect_model_execute_time(self) -> bool: or "all" in self.collect_detailed_traces ) - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index 8639d44c4c76..a272029536c7 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -11,7 +11,7 @@ from typing_extensions import Self import vllm.envs as envs -from vllm.config.utils import HashResult, config, get_compile_factors +from vllm.config.utils import CompileFactors, config, get_compile_factors from vllm.logger import init_logger from vllm.model_executor.layers.batch_invariant import ( vllm_is_batch_invariant, @@ -442,7 +442,7 @@ def sync_kv_cache_memory_size(dp_group: ProcessGroup, kv_cache_memory: int) -> i torch.distributed.all_reduce(tensor, op=ReduceOp.MIN, group=dp_group) return tensor.item() - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ Provide a hash that uniquely identifies all the configs that affect the structure of the computation diff --git a/vllm/config/pooler.py b/vllm/config/pooler.py index da3995346941..7fa090ec6d79 100644 --- a/vllm/config/pooler.py +++ b/vllm/config/pooler.py @@ -3,7 +3,7 @@ from pydantic.dataclasses import dataclass -from vllm.config.utils import HashResult, config +from vllm.config.utils import CompileFactors, config from vllm.logger import init_logger logger = init_logger(__name__) @@ -84,7 +84,7 @@ def __post_init__(self): # raise deprecated warning for softmax and activation self.use_activation = get_use_activation(self) - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index 2b0b8488cbb6..3e06247f9fe8 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -9,7 +9,7 @@ from pydantic.dataclasses import dataclass from typing_extensions import Self, deprecated -from vllm.config.utils import HashResult, config +from vllm.config.utils import CompileFactors, config from vllm.logger import init_logger from vllm.utils.import_utils import resolve_obj_by_qualname @@ -162,7 +162,7 @@ def get_scheduler_cls(self) -> type["SchedulerInterface"]: return cast(type["SchedulerInterface"], self.scheduler_cls) return resolve_obj_by_qualname(self.scheduler_cls) - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index 818a1b35504a..a708ba56cb80 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -10,7 +10,7 @@ from vllm.config.model import ModelConfig from vllm.config.parallel import ParallelConfig -from vllm.config.utils import HashResult, config, normalize_value +from vllm.config.utils import CompileFactors, config, normalize_value from vllm.logger import init_logger from vllm.utils.import_utils import LazyLoader, has_arctic_inference @@ -145,7 +145,7 @@ class SpeculativeConfig: tokens with estimated probability (based on frequency counts) greater than or equal to this value.""" - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/structured_outputs.py b/vllm/config/structured_outputs.py index b208d11bde98..e6565c1b41f8 100644 --- a/vllm/config/structured_outputs.py +++ b/vllm/config/structured_outputs.py @@ -7,7 +7,7 @@ from pydantic.dataclasses import dataclass from typing_extensions import Self -from vllm.config.utils import HashResult, config +from vllm.config.utils import CompileFactors, config StructuredOutputsBackend = Literal[ "auto", "xgrammar", "guidance", "outlines", "lm-format-enforcer" @@ -42,7 +42,7 @@ class StructuredOutputsConfig: enable_in_reasoning: bool = False """Whether to use structured input for reasoning.""" - def compile_factors(self) -> HashResult: + def compile_factors(self) -> CompileFactors: """ WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if diff --git a/vllm/config/utils.py b/vllm/config/utils.py index d08b4de2db7f..82b133943782 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -26,7 +26,7 @@ ConfigType = type[DataclassInstance] ConfigT = TypeVar("ConfigT", bound=ConfigType) -CompileFactors = dict[str, object] | None +CompileFactors = dict[str, object] def config(cls: ConfigT) -> ConfigT: @@ -286,8 +286,7 @@ def get_compile_factors( value = getattr(config, factor, None) # Nested configs expose factors via compile_factors; unwrap first. if isinstance(value, SupportsCompileFactors): - nested = value.compile_factors() - value = {} if nested is None else nested + value = value.compile_factors() try: factors[factor] = normalize_value(value) except TypeError as e: diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index e6e6b758fb1f..1867fe456fe9 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -136,7 +136,10 @@ def compile_factors(self) -> CompileFactors: vllm_factors.append(__version__) def _append_config(config_obj: SupportsCompileFactors | None) -> None: - vllm_factors.append(config_obj.compile_factors() if config_obj else None) + if config_obj is None: + vllm_factors.append({}) + else: + vllm_factors.append(config_obj.compile_factors()) _append_config(self.model_config) _append_config(self.cache_config) @@ -151,14 +154,14 @@ def _append_config(config_obj: SupportsCompileFactors | None) -> None: # graph explicitly. vllm_factors.append(self.scheduler_config.max_num_batched_tokens) else: - vllm_factors.append(None) + vllm_factors.append({}) _append_config(self.speculative_config) _append_config(self.structured_outputs_config) vllm_factors.append(self.observability_config.compile_factors()) if self.compilation_config: vllm_factors.append(self.compilation_config.compile_factors()) else: - vllm_factors.append(None) + vllm_factors.append({}) _append_config(self.kv_transfer_config) _append_config(self.ec_transfer_config) if self.additional_config: @@ -169,7 +172,7 @@ def _append_config(config_obj: SupportsCompileFactors | None) -> None: else: vllm_factors.append(additional_config.compile_factors()) else: - vllm_factors.append(None) + vllm_factors.append({}) factors.append(vllm_factors) return {"vllm": factors} From 78e90607d901633de09f43e6f711e0f23cd1180d Mon Sep 17 00:00:00 2001 From: vnadathur Date: Wed, 26 Nov 2025 19:20:53 -0800 Subject: [PATCH 22/57] accept proper dict Signed-off-by: vnadathur Co-Authored-By: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- vllm/compilation/backends.py | 2 +- vllm/compilation/caching.py | 2 +- vllm/config/cache.py | 5 +- vllm/config/vllm.py | 73 +++++++++++++---------------- vllm/distributed/eplb/eplb_state.py | 2 +- vllm/v1/engine/core.py | 2 +- vllm/v1/engine/utils.py | 2 +- 7 files changed, 40 insertions(+), 48 deletions(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 8329e1e9b807..2c5fb2f24802 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -100,7 +100,7 @@ def __init__(self, compilation_config: CompilationConfig): self.compiler = make_compiler(compilation_config) def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object]: - return self.compiler.compile_factors(vllm_config) or {} + return self.compiler.compile_factors(vllm_config) @contextmanager def compile_context(self, runtime_shape: int | None = None): diff --git a/vllm/compilation/caching.py b/vllm/compilation/caching.py index c94cf917a872..fc36e084ca9f 100644 --- a/vllm/compilation/caching.py +++ b/vllm/compilation/caching.py @@ -170,7 +170,7 @@ def compute_env_and_config_hashes( env_factors = envs.compile_factors() env_hash = hash_factors(env_factors) - config_factors = vllm_config.compile_factors() or {} + config_factors = vllm_config.compile_factors() config_hash = hash_factors(config_factors) return env_hash, config_hash, env_factors, config_factors diff --git a/vllm/config/cache.py b/vllm/config/cache.py index 7b902ddd2016..601639ededa9 100644 --- a/vllm/config/cache.py +++ b/vllm/config/cache.py @@ -178,9 +178,8 @@ def compile_factors(self) -> CompileFactors: # WIP feature toggle not impacting compiled graph shape "kv_sharing_fast_prefill", } - - factors = get_compile_factors(self, ignored_factors) - return factors + + return get_compile_factors(self, ignored_factors) def metrics_info(self): # convert cache_config to dict(key: str, value: str) for prometheus diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 1867fe456fe9..077e99fccc1e 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -127,55 +127,48 @@ def compile_factors(self) -> CompileFactors: excluding anything before input ids/embeddings and after the final hidden states. """ - factors: list[Any] = [] - - # summarize vllm config - vllm_factors: list[Any] = [] from vllm import __version__ - vllm_factors.append(__version__) - - def _append_config(config_obj: SupportsCompileFactors | None) -> None: + def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: if config_obj is None: - vllm_factors.append({}) - else: - vllm_factors.append(config_obj.compile_factors()) - - _append_config(self.model_config) - _append_config(self.cache_config) - _append_config(self.parallel_config) - _append_config(self.scheduler_config) - _append_config(self.device_config) - _append_config(self.load_config) - if self.lora_config: - vllm_factors.append(self.lora_config.compile_factors()) - # LoRA creates static buffers based on max_num_batched_tokens. - # The tensor sizes and strides get captured in the torch.compile - # graph explicitly. - vllm_factors.append(self.scheduler_config.max_num_batched_tokens) - else: - vllm_factors.append({}) - _append_config(self.speculative_config) - _append_config(self.structured_outputs_config) - vllm_factors.append(self.observability_config.compile_factors()) - if self.compilation_config: - vllm_factors.append(self.compilation_config.compile_factors()) - else: - vllm_factors.append({}) - _append_config(self.kv_transfer_config) - _append_config(self.ec_transfer_config) + return {} + return config_obj.compile_factors() + + factors: dict[str, Any] = { + "version": __version__, + "model": _compile(self.model_config), + "cache": _compile(self.cache_config), + "parallel": _compile(self.parallel_config), + "scheduler": _compile(self.scheduler_config), + "device": _compile(self.device_config), + "load": _compile(self.load_config), + "speculative": _compile(self.speculative_config), + "structured_outputs": _compile(self.structured_outputs_config), + "observability": self.observability_config.compile_factors(), + "compilation": ( + self.compilation_config.compile_factors() + if self.compilation_config + else {} + ), + "kv_transfer": _compile(self.kv_transfer_config), + "ec_transfer": _compile(self.ec_transfer_config), + } + + factors["lora"] = ( + self.lora_config.compile_factors() if self.lora_config else {} + ) + factors["max_num_batched_tokens"] = self.scheduler_config.max_num_batched_tokens + if self.additional_config: additional_config = self.additional_config if isinstance(additional_config, dict): - assert isinstance(additional_config, SupportsCompileFactors) - vllm_factors.append(additional_config) + factors["additional"] = additional_config else: - vllm_factors.append(additional_config.compile_factors()) + factors["additional"] = additional_config.compile_factors() else: - vllm_factors.append({}) + factors["additional"] = {} - factors.append(vllm_factors) - return {"vllm": factors} + return factors def pad_for_cudagraph(self, batch_size: int) -> int: # if batch_size > self.compilation_config.max_cudagraph_capture_size, diff --git a/vllm/distributed/eplb/eplb_state.py b/vllm/distributed/eplb/eplb_state.py index 9864b85d1a44..2337aa42229a 100644 --- a/vllm/distributed/eplb/eplb_state.py +++ b/vllm/distributed/eplb/eplb_state.py @@ -509,7 +509,7 @@ def add_model( new_logical_replica_count=new_logical_replica_count, ) - model_factors = model_config.compile_factors() or {} + model_factors = model_config.compile_factors() model_hash = hash_factors(model_factors) self.model_states[model_hash] = model_state diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index bea9ca93f850..3665bffc2411 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -752,7 +752,7 @@ def _perform_handshake( "dp_stats_address": dp_stats_address, } if vllm_config.parallel_config.data_parallel_size > 1: - parallel_factors = vllm_config.parallel_config.compile_factors() or {} + parallel_factors = vllm_config.parallel_config.compile_factors() ready_msg["parallel_config_hash"] = hash_factors(parallel_factors) handshake_socket.send(msgspec.msgpack.encode(ready_msg)) diff --git a/vllm/v1/engine/utils.py b/vllm/v1/engine/utils.py index 207fa40a47ea..19eea68b6535 100644 --- a/vllm/v1/engine/utils.py +++ b/vllm/v1/engine/utils.py @@ -1001,7 +1001,7 @@ def wait_for_engine_startup( f"dp lb mode" ) - parallel_factors = parallel_config.compile_factors() or {} + parallel_factors = parallel_config.compile_factors() parallel_hash = hash_factors(parallel_factors) if status == "HELLO" and engine.state == CoreEngineState.NEW: # Send init message with DP config info and config hash. From 5e0a992727f78d0c0f35105d45a6abe930347ed8 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Wed, 26 Nov 2025 23:25:56 -0500 Subject: [PATCH 23/57] fixed pre-commit Signed-off-by: WorldExplored Co-Authored-By: vnadathur <236933696+vnadathur@users.noreply.github.com> --- vllm/config/cache.py | 2 +- vllm/config/vllm.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/vllm/config/cache.py b/vllm/config/cache.py index 601639ededa9..27f919790fa2 100644 --- a/vllm/config/cache.py +++ b/vllm/config/cache.py @@ -178,7 +178,7 @@ def compile_factors(self) -> CompileFactors: # WIP feature toggle not impacting compiled graph shape "kv_sharing_fast_prefill", } - + return get_compile_factors(self, ignored_factors) def metrics_info(self): diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 077e99fccc1e..73b1b2b96bf8 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -154,9 +154,7 @@ def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: "ec_transfer": _compile(self.ec_transfer_config), } - factors["lora"] = ( - self.lora_config.compile_factors() if self.lora_config else {} - ) + factors["lora"] = self.lora_config.compile_factors() if self.lora_config else {} factors["max_num_batched_tokens"] = self.scheduler_config.max_num_batched_tokens if self.additional_config: From 4c6a0d189a4631b083f8cb1ac5abb0bb292713bc Mon Sep 17 00:00:00 2001 From: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> Date: Thu, 27 Nov 2025 20:38:27 -0800 Subject: [PATCH 24/57] Update vllm/config/vllm.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Luka Govedič Signed-off-by: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- vllm/config/vllm.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 9584d31cf2af..2cb29467af42 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -248,11 +248,7 @@ def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: "speculative": _compile(self.speculative_config), "structured_outputs": _compile(self.structured_outputs_config), "observability": self.observability_config.compile_factors(), - "compilation": ( - self.compilation_config.compile_factors() - if self.compilation_config - else {} - ), + "compilation": _compile(self.compilation_config), "kv_transfer": _compile(self.kv_transfer_config), "ec_transfer": _compile(self.ec_transfer_config), } From 84707339c5ff5e778475f94357db66ba9e4719af Mon Sep 17 00:00:00 2001 From: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> Date: Thu, 27 Nov 2025 20:39:20 -0800 Subject: [PATCH 25/57] Update vllm/config/vllm.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Luka Govedič Signed-off-by: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- vllm/config/vllm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 2cb29467af42..85844cf6f5ce 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -261,6 +261,7 @@ def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: if isinstance(additional_config, dict): factors["additional"] = additional_config else: + assert isinstance(additional_config, SupportsCompileFactors) factors["additional"] = additional_config.compile_factors() else: factors["additional"] = {} From 42390f49862588e0b8e512cf63afc086d49f4cbd Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Sat, 29 Nov 2025 01:47:03 -0500 Subject: [PATCH 26/57] fixed pre-comm + reviewer feedback Signed-off-by: WorldExplored Co-Authored-By: vnadathur <236933696+vnadathur@users.noreply.github.com> --- vllm/config/multimodal.py | 1 + vllm/config/vllm.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/config/multimodal.py b/vllm/config/multimodal.py index 0890674558b7..6e910a07ecb1 100644 --- a/vllm/config/multimodal.py +++ b/vllm/config/multimodal.py @@ -7,6 +7,7 @@ from pydantic import ConfigDict, Field, field_validator, model_validator from pydantic.dataclasses import dataclass +from vllm.attention.backends.registry import AttentionBackendEnum from vllm.config.utils import CompileFactors, config, normalize_value diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index af6b864d5765..a749a7b241fa 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -251,9 +251,9 @@ def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: "compilation": _compile(self.compilation_config), "kv_transfer": _compile(self.kv_transfer_config), "ec_transfer": _compile(self.ec_transfer_config), + "lora": _compile(self.lora_config), } - factors["lora"] = self.lora_config.compile_factors() if self.lora_config else {} factors["max_num_batched_tokens"] = self.scheduler_config.max_num_batched_tokens if self.additional_config: From 69c780b5892626a437945548a33beddb0cad5a68 Mon Sep 17 00:00:00 2001 From: vnadathur Date: Mon, 1 Dec 2025 15:02:37 -0800 Subject: [PATCH 27/57] have adaptors pass dicts. Signed-off-by: vnadathur Co-Authored-By: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- vllm/compilation/backends.py | 4 ++-- vllm/compilation/compiler_interface.py | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 2c5fb2f24802..ed6e5f959357 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -26,7 +26,7 @@ should_split, ) from vllm.config import CompilationConfig, CUDAGraphMode, VllmConfig -from vllm.config.utils import hash_factors +from vllm.config.utils import CompileFactors, hash_factors from vllm.logger import init_logger from vllm.logging_utils import lazy from vllm.platforms import current_platform @@ -99,7 +99,7 @@ def __init__(self, compilation_config: CompilationConfig): self.compilation_config = compilation_config self.compiler = make_compiler(compilation_config) - def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object]: + def compile_factors(self, vllm_config: VllmConfig) -> CompileFactors: return self.compiler.compile_factors(vllm_config) @contextmanager diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py index d57c265a0c23..7b03463fcf51 100644 --- a/vllm/compilation/compiler_interface.py +++ b/vllm/compilation/compiler_interface.py @@ -15,6 +15,7 @@ import vllm.envs as envs from vllm.compilation.counter import compilation_counter from vllm.config import VllmConfig +from vllm.config.utils import CompileFactors from vllm.utils.torch_utils import is_torch_equal_or_newer @@ -45,14 +46,15 @@ def initialize_cache( """ pass - def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object]: + def compile_factors(self, vllm_config: VllmConfig) -> CompileFactors: """ Gather compiler-specific factors that influence the generated code. See [`VllmConfig.compile_factors`][vllm.config.VllmConfig.compile_factors] for the base configuration factors. This method should return any additional data that uniquely identifies the compiler's contribution to - the cache key. + the cache key. Subclasses must return a dictionary; use an empty dict + when no compiler-specific data is needed. """ return {} @@ -193,7 +195,7 @@ class InductorStandaloneAdaptor(CompilerInterface): def __init__(self, save_format: Literal["binary", "unpacked"]): self.save_format = save_format - def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object]: + def compile_factors(self, vllm_config: VllmConfig) -> CompileFactors: return {"inductor_standalone": get_inductor_factors()} def initialize_cache( @@ -278,7 +280,7 @@ class InductorAdaptor(CompilerInterface): name = "inductor" - def compile_factors(self, vllm_config: VllmConfig) -> dict[str, object]: + def compile_factors(self, vllm_config: VllmConfig) -> CompileFactors: return {"inductor": get_inductor_factors()} def initialize_cache( From 90df680507599e54c0f3df687e48e0767e594ffe Mon Sep 17 00:00:00 2001 From: vnadathur Date: Mon, 1 Dec 2025 15:13:41 -0800 Subject: [PATCH 28/57] pre-com Signed-off-by: vnadathur Co-Authored-By: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- vllm/config/pooler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/config/pooler.py b/vllm/config/pooler.py index 139dfe7fcad5..dc3edb021f47 100644 --- a/vllm/config/pooler.py +++ b/vllm/config/pooler.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from typing import Literal + from pydantic.dataclasses import dataclass from vllm.config.utils import CompileFactors, config From 9ef5ebcb1d28d572c305f4d72d61d176aa67eba2 Mon Sep 17 00:00:00 2001 From: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> Date: Thu, 4 Dec 2025 14:40:47 -0800 Subject: [PATCH 29/57] Update vllm/config/utils.py Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- vllm/config/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/config/utils.py b/vllm/config/utils.py index 9f4186ddb14d..49547f647683 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -276,7 +276,7 @@ def normalize_value(x): def get_compile_factors( config: ConfigT, ignored_factors: set[str] -) -> dict[str, object]: +) -> CompileFactors: """Gets the factors used for hashing a config class. - Includes all dataclass fields not in `ignored_factors`. - Uses .compile_factors() for nested dataclasses that support it From d643f93908e9725b0c852297126ee76c45998480 Mon Sep 17 00:00:00 2001 From: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> Date: Thu, 4 Dec 2025 14:41:04 -0800 Subject: [PATCH 30/57] Update vllm/config/vllm.py Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- vllm/config/vllm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 1750e2891459..7096c6e22d83 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -263,7 +263,7 @@ def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: "load": _compile(self.load_config), "speculative": _compile(self.speculative_config), "structured_outputs": _compile(self.structured_outputs_config), - "observability": self.observability_config.compile_factors(), + "observability": _compile(self.observability_config), "compilation": _compile(self.compilation_config), "kv_transfer": _compile(self.kv_transfer_config), "ec_transfer": _compile(self.ec_transfer_config), From 00185c66119c4a79b8589b2e2f2135a5067eca48 Mon Sep 17 00:00:00 2001 From: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> Date: Thu, 4 Dec 2025 14:41:19 -0800 Subject: [PATCH 31/57] Update vllm/compilation/caching.py Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- vllm/compilation/caching.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vllm/compilation/caching.py b/vllm/compilation/caching.py index fc36e084ca9f..21f110bcbca5 100644 --- a/vllm/compilation/caching.py +++ b/vllm/compilation/caching.py @@ -39,11 +39,9 @@ def get_code_factors(forward_code_files: list[Path]) -> list[dict[str, str]]: try: with filepath.open() as f: content = f.read() + entry["hash"] = hash_factors({"content": content}) except Exception: - logger.warning("Failed to read file %s", path_str) - code_factors.append(entry) - continue - entry["hash"] = hash_factors({"content": content}) + logger.warning("Failed to read file %s", path_str) code_factors.append(entry) return code_factors From 12695708cba67684d799fd0f908f13b3291301a6 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Sun, 7 Dec 2025 22:32:10 -0500 Subject: [PATCH 32/57] fixed precommit Signed-off-by: WorldExplored Co-Authored-By: vnadathur <236933696+vnadathur@users.noreply.github.com> --- vllm/compilation/backends.py | 19 ++++++++++++------- vllm/compilation/caching.py | 2 +- vllm/compilation/compiler_interface.py | 3 +-- vllm/config/compilation.py | 22 +++++++++++++--------- vllm/config/lora.py | 22 +++++++++++----------- vllm/config/parallel.py | 2 +- vllm/config/scheduler.py | 8 ++++++-- vllm/config/utils.py | 4 +--- vllm/config/vllm.py | 2 -- 9 files changed, 46 insertions(+), 38 deletions(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index e38c79907309..5c7a74590a49 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -11,7 +11,6 @@ from collections.abc import Callable, Sequence from contextlib import contextmanager from copy import deepcopy -from functools import partial from pathlib import Path from typing import Any @@ -26,7 +25,7 @@ should_split, ) from vllm.config import CompilationConfig, CUDAGraphMode, VllmConfig -from vllm.config.utils import Range, CompileFactors, hash_factors +from vllm.config.utils import CompileFactors, Range, hash_factors from vllm.logger import init_logger from vllm.logging_utils import lazy from vllm.platforms import current_platform @@ -638,11 +637,6 @@ def __call__( # Persist and log only hash-relevant factors together. try: - logger.debug( - "Compile env factors (raw):\n%s\nVllm config hash: %s", - lazy(partial(pprint.pformat, env_factors, width=120)), - config_hash, - ) meta_path = os.path.join(local_cache_dir, "cache_key_factors.json") if not os.path.exists(meta_path): with open(meta_path, "w") as f: @@ -660,6 +654,17 @@ def __call__( indent=2, sort_keys=True, ) + logger.debug( + ( + "Persisted compile cache factors to %s " + "(env_keys=%d config_keys=%d compiler_keys=%d code_entries=%d)" + ), + meta_path, + len(env_factors), + len(config_factors), + len(compiler_factors), + len(code_factors), + ) except Exception: # Best-effort only; metadata write failures are non-fatal. logger.warning( diff --git a/vllm/compilation/caching.py b/vllm/compilation/caching.py index 21f110bcbca5..97b509aab8bb 100644 --- a/vllm/compilation/caching.py +++ b/vllm/compilation/caching.py @@ -41,7 +41,7 @@ def get_code_factors(forward_code_files: list[Path]) -> list[dict[str, str]]: content = f.read() entry["hash"] = hash_factors({"content": content}) except Exception: - logger.warning("Failed to read file %s", path_str) + logger.warning("Failed to read file %s", path_str) code_factors.append(entry) return code_factors diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py index b4bcc53b6dd7..f7a54051f2ad 100644 --- a/vllm/compilation/compiler_interface.py +++ b/vllm/compilation/compiler_interface.py @@ -15,8 +15,7 @@ import vllm.envs as envs from vllm.compilation.counter import compilation_counter from vllm.config import VllmConfig -from vllm.config.utils import Range, CompileFactors -from vllm.utils.hashing import safe_hash +from vllm.config.utils import CompileFactors, Range from vllm.utils.torch_utils import is_torch_equal_or_newer diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index ad1f9744f27e..5ab7527c73be 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -13,7 +13,14 @@ import vllm.envs as envs from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass -from vllm.config.utils import CompileFactors, config, get_compile_factors +from vllm.config.utils import ( + CompileFactors, + Range, + config, + get_compile_factors, + handle_deprecated, + hash_factors, +) from vllm.logger import init_logger from vllm.platforms import current_platform from vllm.utils.import_utils import resolve_obj_by_qualname @@ -200,15 +207,15 @@ def compile_factors(self) -> CompileFactors: Any future fields that don't affect compilation should be excluded. """ - ignored_fields = [ + ignored_fields: set[str] = { "enable_fusion", "enable_attn_fusion", "enable_noop", "enable_sequence_parallelism", "enable_async_tp", "enable_fi_allreduce_fusion", - ] - return hash_factors(get_hash_factors(self, ignored_factors=ignored_fields)) + } + return get_compile_factors(self, ignored_fields) @field_validator( "fuse_norm_quant", @@ -345,9 +352,7 @@ def compute_hash(self) -> str: Provide a hash for DynamicShapesConfig """ - from vllm.config.utils import get_hash_factors, hash_factors - - factors = get_hash_factors(self, {}) + factors = get_compile_factors(self, set()) return hash_factors(factors) @@ -697,8 +702,7 @@ def compile_factors(self) -> CompileFactors: "static_forward_context", } - factors = get_compile_factors(self, ignored_factors) - return factors or {} + return get_compile_factors(self, ignored_factors) def __repr__(self) -> str: exclude = { diff --git a/vllm/config/lora.py b/vllm/config/lora.py index ea2d46d61c46..98db43f71346 100644 --- a/vllm/config/lora.py +++ b/vllm/config/lora.py @@ -8,7 +8,7 @@ from pydantic.dataclasses import dataclass from typing_extensions import Self -from vllm.config.utils import CompileFactors, config, normalize_value +from vllm.config.utils import CompileFactors, config, get_compile_factors from vllm.logger import init_logger if TYPE_CHECKING: @@ -44,6 +44,10 @@ class LoRAConfig: `max_loras`.""" lora_dtype: torch.dtype | LoRADType = "auto" """Data type for LoRA. If auto, will default to base model dtype.""" + lora_extra_vocab_size: int = 0 + """Extra vocab size reserved for LoRA adapters.""" + lora_vocab_padding_size: int = 0 + """Padding size applied to LoRA vocab.""" default_mm_loras: dict[str, str] | None = None """Dictionary mapping specific modalities to LoRA model paths; this field is only applicable to multimodal models and should be leveraged when a @@ -67,16 +71,12 @@ def compile_factors(self) -> CompileFactors: excluding anything before input ids/embeddings and after the final hidden states. """ - factors: list[Any] = [ - self.max_lora_rank, - self.max_loras, - self.fully_sharded_loras, - self.lora_dtype, - self.lora_extra_vocab_size, - self.lora_vocab_padding_size, - ] - normalized = normalize_value(factors) - return {"factors": normalized} if normalized else {} + ignored_factors = { + # Runtime/placement only; does not affect compiled graph + "max_cpu_loras", + "default_mm_loras", + } + return get_compile_factors(self, ignored_factors) @model_validator(mode="after") def _validate_lora_config(self) -> Self: diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index 40d97660ad10..a0349d1599cd 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -494,7 +494,7 @@ def compile_factors(self) -> CompileFactors: factors = get_compile_factors(self, ignored_factors) # Explicitly include backend affecting env factor as before factors["VLLM_ALL2ALL_BACKEND"] = str(envs.VLLM_ALL2ALL_BACKEND) - return factors or {} + return factors def __post_init__(self) -> None: # Set all2all_backend from env var if not specified, with deprecation warning diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index 9f425f4daa11..7f81d4aaf250 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -185,8 +185,12 @@ def compile_factors(self) -> CompileFactors: excluding anything before input ids/embeddings and after the final hidden states. """ - # This config does not affect the compiled graph. - return {} + # Only surface scheduler knobs that influence compiled shapes. + return { + "max_num_batched_tokens": self.max_num_batched_tokens, + "max_num_seqs": self.max_num_seqs, + "max_num_partial_prefills": self.max_num_partial_prefills, + } @field_validator("scheduler_cls", "async_scheduling", mode="wrap") @classmethod diff --git a/vllm/config/utils.py b/vllm/config/utils.py index 1b2b4e3b740e..e438378ffa10 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -274,9 +274,7 @@ def normalize_value(x): ) -def get_compile_factors( - config: ConfigT, ignored_factors: set[str] -) -> CompileFactors: +def get_compile_factors(config: ConfigT, ignored_factors: set[str]) -> CompileFactors: """Gets the factors used for hashing a config class. - Includes all dataclass fields not in `ignored_factors`. - Uses .compile_factors() for nested dataclasses that support it diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index bf0e7773f3aa..a4b639de0607 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -273,8 +273,6 @@ def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: "lora": _compile(self.lora_config), } - factors["max_num_batched_tokens"] = self.scheduler_config.max_num_batched_tokens - if self.additional_config: additional_config = self.additional_config if isinstance(additional_config, dict): From 70ec1923513dc1cfb44b798562157cdd46c565fb Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Mon, 8 Dec 2025 16:44:28 -0500 Subject: [PATCH 33/57] fixed precomit Signed-off-by: WorldExplored Co-Authored-By: vnadathur <236933696+vnadathur@users.noreply.github.com> --- vllm/compilation/backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 4e3fe2dc3292..2cada01dfda0 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -25,8 +25,8 @@ should_split, ) from vllm.config import CompilationConfig, CUDAGraphMode, VllmConfig -from vllm.config.utils import CompileFactors, Range, hash_factors from vllm.config.compilation import DynamicShapesType +from vllm.config.utils import CompileFactors, Range, hash_factors from vllm.logger import init_logger from vllm.logging_utils import lazy from vllm.platforms import current_platform From 0ee4834f397117920ed5d5305f69b7879b7d2ccc Mon Sep 17 00:00:00 2001 From: vnadathur Date: Mon, 15 Dec 2025 17:09:03 -0800 Subject: [PATCH 34/57] fix assert & precom Signed-off-by: vnadathur --- vllm/config/compilation.py | 1 - vllm/config/vllm.py | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 0bcc58cb8285..65036e2157f1 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -18,7 +18,6 @@ Range, config, get_compile_factors, - handle_deprecated, hash_factors, ) from vllm.logger import init_logger diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 7c274b58a54b..862d2620689d 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -280,9 +280,12 @@ def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: additional_config = self.additional_config if isinstance(additional_config, dict): factors["additional"] = additional_config - else: - assert isinstance(additional_config, SupportsCompileFactors) + elif isinstance(additional_config, SupportsCompileFactors): factors["additional"] = additional_config.compile_factors() + else: + raise TypeError( + "additional_config must be a dict or SupportsCompileFactors" + ) else: factors["additional"] = {} From 9587a86af01d51841ed5964df8493d601166ac7c Mon Sep 17 00:00:00 2001 From: vnadathur Date: Mon, 15 Dec 2025 17:59:21 -0800 Subject: [PATCH 35/57] precom Signed-off-by: vnadathur --- vllm/compilation/decorators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py index f97e6fff9ee8..0d0ec5fdc94a 100644 --- a/vllm/compilation/decorators.py +++ b/vllm/compilation/decorators.py @@ -391,7 +391,7 @@ def __call__(self, *args, **kwargs): serialized backend artifacts), then we need to generate a new AOT compile artifact from scratch. """ - + from .caching import compilation_config_hash_factors factors: list[str] = compilation_config_hash_factors(self.vllm_config) From 54c0e10201fd95703647f635da50346bc0fa6497 Mon Sep 17 00:00:00 2001 From: vnadathur Date: Mon, 15 Dec 2025 20:46:37 -0800 Subject: [PATCH 36/57] pre-com Signed-off-by: vnadathur --- vllm/compilation/decorators.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py index 0d0ec5fdc94a..930db939b7e4 100644 --- a/vllm/compilation/decorators.py +++ b/vllm/compilation/decorators.py @@ -392,10 +392,6 @@ def __call__(self, *args, **kwargs): compile artifact from scratch. """ - from .caching import compilation_config_hash_factors - - factors: list[str] = compilation_config_hash_factors(self.vllm_config) - # Keep AOT cache key in sync with JIT: env factors + config hash + model. env_hash, config_hash, *_ = compute_env_and_config_hashes(self.vllm_config) factors: list[str] = [env_hash, config_hash] From 5673536e3c38cc7cf7977d8234de5a0740c07dc9 Mon Sep 17 00:00:00 2001 From: vnadathur Date: Mon, 22 Dec 2025 17:29:07 -0800 Subject: [PATCH 37/57] cleanup compilation config, merge, etc ., Signed-off-by: vnadathur --- vllm/compilation/decorators.py | 10 +++++++--- vllm/config/compilation.py | 6 +++++- vllm/config/lora.py | 4 ---- vllm/config/parallel.py | 5 +---- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py index b10f9af2d898..6b103fc74178 100644 --- a/vllm/compilation/decorators.py +++ b/vllm/compilation/decorators.py @@ -25,6 +25,7 @@ set_current_vllm_config, ) from vllm.config.compilation import DynamicShapesType +from vllm.config.utils import hash_factors from vllm.logger import init_logger from vllm.sequence import IntermediateTensors from vllm.utils.import_utils import resolve_obj_by_qualname @@ -394,9 +395,12 @@ def __call__(self, *args, **kwargs): # Keep AOT cache key in sync with JIT: env factors + config hash + model. env_hash, config_hash, *_ = compute_env_and_config_hashes(self.vllm_config) - factors: list[str] = [env_hash, config_hash] - factors.append(_model_hash_key(self.forward)) - hash_key = hashlib.sha256(str(factors).encode()).hexdigest() + factors = { + "env_hash": env_hash, + "config_hash": config_hash, + "model": _model_hash_key(self.forward), + } + hash_key = hash_factors(factors) cache_dir = os.path.join( envs.VLLM_CACHE_ROOT, "torch_aot_compile", diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index f2ce9e1fcf99..fedd33260de2 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -640,9 +640,13 @@ def compile_factors(self) -> CompileFactors: "traced_files", "compilation_time", "static_forward_context", + # handled explicitly below for clarity + "pass_config", } - return get_compile_factors(self, ignored_factors) + factors = get_compile_factors(self, ignored_factors) + factors["pass_config"] = self.pass_config.compile_factors() + return factors def __repr__(self) -> str: exclude = { diff --git a/vllm/config/lora.py b/vllm/config/lora.py index 98db43f71346..f52da525ce52 100644 --- a/vllm/config/lora.py +++ b/vllm/config/lora.py @@ -44,10 +44,6 @@ class LoRAConfig: `max_loras`.""" lora_dtype: torch.dtype | LoRADType = "auto" """Data type for LoRA. If auto, will default to base model dtype.""" - lora_extra_vocab_size: int = 0 - """Extra vocab size reserved for LoRA adapters.""" - lora_vocab_padding_size: int = 0 - """Padding size applied to LoRA vocab.""" default_mm_loras: dict[str, str] | None = None """Dictionary mapping specific modalities to LoRA model paths; this field is only applicable to multimodal models and should be leveraged when a diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index 6dddfa615560..bae3de034aea 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -494,10 +494,7 @@ def compile_factors(self) -> CompileFactors: "_api_process_rank", } - factors = get_compile_factors(self, ignored_factors) - # Explicitly include backend affecting env factor as before - factors["VLLM_ALL2ALL_BACKEND"] = str(envs.VLLM_ALL2ALL_BACKEND) - return factors + return get_compile_factors(self, ignored_factors) def __post_init__(self) -> None: # Set all2all_backend from env var if not specified, with deprecation warning From ef957b75df915051d93403d56c9298c3908b64f1 Mon Sep 17 00:00:00 2001 From: vnadathur Date: Wed, 24 Dec 2025 11:24:39 -0800 Subject: [PATCH 38/57] fix passconfig Signed-off-by: vnadathur --- vllm/config/compilation.py | 16 ++-------------- vllm/config/utils.py | 7 +++++++ 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index fedd33260de2..7288e0ea3ed4 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -185,15 +185,7 @@ def compile_factors(self) -> CompileFactors: Any future fields that don't affect compilation should be excluded. """ - ignored_fields: set[str] = { - "enable_fusion", - "enable_attn_fusion", - "enable_noop", - "enable_sequence_parallelism", - "enable_async_tp", - "enable_fi_allreduce_fusion", - } - return get_compile_factors(self, ignored_fields) + return get_compile_factors(self, set()) @field_validator( "fuse_norm_quant", @@ -640,13 +632,9 @@ def compile_factors(self) -> CompileFactors: "traced_files", "compilation_time", "static_forward_context", - # handled explicitly below for clarity - "pass_config", } - factors = get_compile_factors(self, ignored_factors) - factors["pass_config"] = self.pass_config.compile_factors() - return factors + return get_compile_factors(self, ignored_factors) def __repr__(self) -> str: exclude = { diff --git a/vllm/config/utils.py b/vllm/config/utils.py index c3ea95db272f..bc4b93c87198 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -294,6 +294,13 @@ def get_compile_factors(config: ConfigT, ignored_factors: set[str]) -> CompileFa - Uses .compile_factors() for nested dataclasses that support it - Errors on non-normalizable values. """ + field_names = {f.name for f in fields(config)} + unknown_ignored = ignored_factors - field_names + if unknown_ignored: + raise ValueError( + f"get_compile_factors: ignored_factors contain unknown fields " + f"{sorted(unknown_ignored)} for {type(config).__name__}" + ) factors: dict[str, object] = {} for dc_field in fields(config): factor = dc_field.name From fbcaf9ef7909fe028dc7e7a53f79b51d7330d3ce Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Sun, 28 Dec 2025 12:40:40 -0800 Subject: [PATCH 39/57] fixed circular imporrt Signed-off-by: WorldExplored --- vllm/envs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index b98ee9d8be4b..dc552790ecc4 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -10,8 +10,6 @@ from collections.abc import Callable from typing import TYPE_CHECKING, Any, Literal -from vllm.config.utils import normalize_value - if TYPE_CHECKING: VLLM_HOST_IP: str = "" VLLM_PORT: int | None = None @@ -1648,6 +1646,8 @@ def is_set(name: str): def compile_factors() -> dict[str, object]: """Collect env vars used for torch.compile cache keys.""" + from vllm.config.utils import normalize_value + ignored_factors: set[str] = { "MAX_JOBS", "VLLM_RPC_BASE_PATH", From cd168a75bad85dabb882b079912d57aa550fb894 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Sun, 28 Dec 2025 14:58:13 -0800 Subject: [PATCH 40/57] fixing cache breaks. Signed-off-by: WorldExplored --- vllm/distributed/parallel_state.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py index f5ada5a009ec..3374cf1c9d3e 100644 --- a/vllm/distributed/parallel_state.py +++ b/vllm/distributed/parallel_state.py @@ -28,7 +28,7 @@ import pickle import weakref from collections import namedtuple -from collections.abc import Callable +from collections.abc import Callable, Sequence from contextlib import contextmanager, nullcontext from dataclasses import dataclass from datetime import timedelta @@ -171,7 +171,7 @@ def patched_fused_scaled_matmul_reduce_scatter_fake( orig_scatter_dim: int, scatter_dim_after_maybe_reshape: int, group_name: str, - output_shape: list[int], + output_shape: Sequence[int], bias: torch.Tensor | None = None, result_scale: torch.Tensor | None = None, out_dtype: torch.dtype | None = None, @@ -223,7 +223,7 @@ def patched_fused_scaled_matmul_reduce_scatter( orig_scatter_dim: int, scatter_dim_after_maybe_reshape: int, group_name: str, - output_shape: list[int], + output_shape: Sequence[int], bias: torch.Tensor | None = None, result_scale: torch.Tensor | None = None, out_dtype: torch.dtype | None = None, From fee0ee83f303b8da79f31666d21e846a6909c425 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Wed, 7 Jan 2026 17:49:51 -0800 Subject: [PATCH 41/57] fixed precom Signed-off-by: WorldExplored --- vllm/v1/engine/utils.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/vllm/v1/engine/utils.py b/vllm/v1/engine/utils.py index 45915537cb2d..9a908004295d 100644 --- a/vllm/v1/engine/utils.py +++ b/vllm/v1/engine/utils.py @@ -1032,10 +1032,6 @@ def wait_for_engine_startup( "_data_parallel_master_port_list", "data_parallel_size", ) - }, - parallel_config_hash=parallel_hash - if parallel_config.data_parallel_size > 1 - else None, } if coordinated_dp else {}, From a0b60a4a902af7d59c43b1bda361fdade083fd2b Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Mon, 12 Jan 2026 13:08:30 -0500 Subject: [PATCH 42/57] precom fix Signed-off-by: WorldExplored --- vllm/compilation/caching.py | 2 ++ vllm/compilation/pass_manager.py | 3 +-- vllm/config/pooler.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/vllm/compilation/caching.py b/vllm/compilation/caching.py index 49e2ae9a5f01..e83fe9f9fec4 100644 --- a/vllm/compilation/caching.py +++ b/vllm/compilation/caching.py @@ -4,7 +4,9 @@ import inspect import os import pickle +from collections.abc import Callable, Sequence from pathlib import Path +from typing import Any, Literal from unittest.mock import patch import torch diff --git a/vllm/compilation/pass_manager.py b/vllm/compilation/pass_manager.py index 15ec0b08fc4e..60dae41fa866 100644 --- a/vllm/compilation/pass_manager.py +++ b/vllm/compilation/pass_manager.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import functools from collections.abc import Callable -from typing import Any, ParamSpec, TypeVar +from typing import ParamSpec, TypeVar from torch import fx as fx @@ -159,5 +159,4 @@ def uuid(self) -> str: # Include the compile range in the uuid to ensure that inductor # recompiles the graph for the new dynamic compile range. state["compile_range"] = str(get_pass_context().compile_range) - state["passes"] = passes return InductorPass.hash_dict(state) diff --git a/vllm/config/pooler.py b/vllm/config/pooler.py index 590a5cd773fb..802baabdbbed 100644 --- a/vllm/config/pooler.py +++ b/vllm/config/pooler.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import Any, Literal, get_args +from typing import Literal, get_args from pydantic.dataclasses import dataclass From 8f3d1afbe9b296ddc6a12c9ff28f185623ebdc3c Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Mon, 12 Jan 2026 21:23:46 -0500 Subject: [PATCH 43/57] fixed cursor comments Signed-off-by: WorldExplored --- vllm/config/scheduler.py | 13 +++++++------ vllm/config/vllm.py | 9 ++++++--- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index c563d46012e3..925388434785 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -188,12 +188,13 @@ def compile_factors(self) -> CompileFactors: excluding anything before input ids/embeddings and after the final hidden states. """ - # Only surface scheduler knobs that influence compiled shapes. - return { - "max_num_batched_tokens": self.max_num_batched_tokens, - "max_num_seqs": self.max_num_seqs, - "max_num_partial_prefills": self.max_num_partial_prefills, - } + # Only surface scheduler knobs that influence compiled shapes. Legacy + # scheduler hashing fed a list of factors (currently just + # max_num_batched_tokens) into safe_hash. The compile cache now hashes + # the entire config factors via JSON, so returning the factor list keeps + # the data payload unchanged (the outer dict from VllmConfig supplies + # the key) and avoids a shape change that would perturb the cache key. + return [self.max_num_batched_tokens] @field_validator("scheduler_cls", "async_scheduling", mode="wrap") @classmethod diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 7ee1f5268a25..50edc6cf8e53 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -256,8 +256,10 @@ def compile_factors(self) -> CompileFactors: def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: if config_obj is None: - return {} - return config_obj.compile_factors() + return {"__none__": True} + + factors = config_obj.compile_factors() + return factors if factors else {"__empty__": True} factors: dict[str, Any] = { "version": __version__, @@ -267,6 +269,7 @@ def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: "scheduler": _compile(self.scheduler_config), "device": _compile(self.device_config), "load": _compile(self.load_config), + "attention": _compile(self.attention_config), "speculative": _compile(self.speculative_config), "structured_outputs": _compile(self.structured_outputs_config), "observability": _compile(self.observability_config), @@ -287,7 +290,7 @@ def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: "additional_config must be a dict or SupportsCompileFactors" ) else: - factors["additional"] = {} + factors["additional"] = {"__none__": True} return factors From dd759a093ac26cafd65c1d7ee1fae5bbc0513e42 Mon Sep 17 00:00:00 2001 From: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> Date: Tue, 13 Jan 2026 05:01:26 -0800 Subject: [PATCH 44/57] Update vllm/config/utils.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Luka Govedič Signed-off-by: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> --- vllm/config/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/config/utils.py b/vllm/config/utils.py index 9e3819ec7057..457074c4240d 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -313,7 +313,8 @@ def get_compile_factors(config: ConfigT, ignored_factors: set[str]) -> CompileFa value = getattr(config, factor, None) # Nested configs expose factors via compile_factors; unwrap first. if isinstance(value, SupportsCompileFactors): - value = value.compile_factors() + factors[factor] = value.compile_factors() + continue try: factors[factor] = normalize_value(value) except TypeError as e: From 7558451a81fec7e6d2f75083c4c3c2ec41370de7 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Tue, 13 Jan 2026 17:33:37 -0500 Subject: [PATCH 45/57] Addressed Reviewer Comments Signed-off-by: WorldExplored --- vllm/config/model.py | 9 --------- vllm/config/multimodal.py | 2 +- vllm/config/scheduler.py | 8 ++++---- vllm/config/utils.py | 5 ++++- vllm/config/vllm.py | 8 ++------ 5 files changed, 11 insertions(+), 21 deletions(-) diff --git a/vllm/config/model.py b/vllm/config/model.py index 02ff8c31bde6..4e8015ae00f5 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -352,15 +352,6 @@ def compile_factors(self) -> CompileFactors: "io_processor_plugin", "pooler_config", "multimodal_config", - "limit_mm_per_prompt", - "media_io_kwargs", - "mm_processor_kwargs", - "mm_processor_cache_gb", - "mm_processor_cache_type", - "mm_shm_cache_max_object_size_mb", - "mm_encoder_tp_mode", - "interleave_mm_strings", - "skip_mm_profiling", } factors = get_compile_factors(self, ignored_factors) diff --git a/vllm/config/multimodal.py b/vllm/config/multimodal.py index fc7a5ca6365e..957a27a96f30 100644 --- a/vllm/config/multimodal.py +++ b/vllm/config/multimodal.py @@ -7,8 +7,8 @@ from pydantic import ConfigDict, Field, field_validator, model_validator from pydantic.dataclasses import dataclass -from vllm.attention.backends.registry import AttentionBackendEnum from vllm.config.utils import CompileFactors, config, normalize_value +from vllm.v1.attention.backends.registry import AttentionBackendEnum @dataclass diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index 925388434785..0eb029d058a1 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -191,10 +191,10 @@ def compile_factors(self) -> CompileFactors: # Only surface scheduler knobs that influence compiled shapes. Legacy # scheduler hashing fed a list of factors (currently just # max_num_batched_tokens) into safe_hash. The compile cache now hashes - # the entire config factors via JSON, so returning the factor list keeps - # the data payload unchanged (the outer dict from VllmConfig supplies - # the key) and avoids a shape change that would perturb the cache key. - return [self.max_num_batched_tokens] + # the nested factors dict via JSON, so we expose a dict payload to + # satisfy the CompileFactors protocol while keeping the factor content + # minimal to avoid unnecessary cache key churn. + return {"max_num_batched_tokens": self.max_num_batched_tokens} @field_validator("scheduler_cls", "async_scheduling", mode="wrap") @classmethod diff --git a/vllm/config/utils.py b/vllm/config/utils.py index 457074c4240d..051029ffd4b5 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -298,7 +298,10 @@ def get_compile_factors(config: ConfigT, ignored_factors: set[str]) -> CompileFa - Uses .compile_factors() for nested dataclasses that support it - Errors on non-normalizable values. """ - field_names = {f.name for f in fields(config)} + # dataclasses.fields() skips InitVar entries; __dataclass_fields__ keeps + # them. Include both so ignored_factors can safely name InitVars. + dataclass_fields = getattr(config, "__dataclass_fields__", {}) + field_names = {f.name for f in fields(config)} | set(dataclass_fields) unknown_ignored = ignored_factors - field_names if unknown_ignored: raise ValueError( diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 50edc6cf8e53..cc0e944c7a80 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -255,11 +255,7 @@ def compile_factors(self) -> CompileFactors: from vllm import __version__ def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: - if config_obj is None: - return {"__none__": True} - - factors = config_obj.compile_factors() - return factors if factors else {"__empty__": True} + return {} if config_obj is None else config_obj.compile_factors() factors: dict[str, Any] = { "version": __version__, @@ -290,7 +286,7 @@ def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: "additional_config must be a dict or SupportsCompileFactors" ) else: - factors["additional"] = {"__none__": True} + factors["additional"] = {} return factors From 6c212a3debcba5a16a5034688fde7a68597ccdc8 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Wed, 14 Jan 2026 01:31:44 -0500 Subject: [PATCH 46/57] Addressed concerns Signed-off-by: WorldExplored Co-Authored-By: Vikram Nadathur <236933696+vnadathur@users.noreply.github.com> --- vllm/config/attention.py | 10 +++++++++- vllm/config/compilation.py | 9 +++------ vllm/config/profiler.py | 6 +++++- vllm/config/vllm.py | 1 + 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/vllm/config/attention.py b/vllm/config/attention.py index 293045787a1c..3d0794ca726c 100644 --- a/vllm/config/attention.py +++ b/vllm/config/attention.py @@ -6,7 +6,7 @@ from pydantic import field_validator from pydantic.dataclasses import dataclass -from vllm.config.utils import config +from vllm.config.utils import CompileFactors, config, get_compile_factors from vllm.logger import init_logger from vllm.v1.attention.backends.registry import AttentionBackendEnum @@ -62,6 +62,14 @@ def compute_hash(self) -> str: factors = get_hash_factors(self, ignored_factors) return hash_factors(factors) + def compile_factors(self) -> CompileFactors: + """ + Provide the factors that affect the compiled computation graph. + All dataclass fields participate; add fields to an ignore set if + they should not influence compilation cache keys. + """ + return get_compile_factors(self, set()) + @field_validator("backend", mode="before") @classmethod def validate_backend_before(cls, value: Any) -> Any: diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 17276d38e284..470b07d7783c 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -18,7 +18,6 @@ Range, config, get_compile_factors, - hash_factors, ) from vllm.logger import init_logger from vllm.platforms import current_platform @@ -286,13 +285,11 @@ class DynamicShapesConfig: whether all tensor sizes can use 32 bit indexing. """ - def compute_hash(self) -> str: + def compile_factors(self) -> CompileFactors: """ - Provide a hash for DynamicShapesConfig + Provide the factors used for hashing DynamicShapesConfig. """ - - factors = get_compile_factors(self, set()) - return hash_factors(factors) + return get_compile_factors(self, set()) @config diff --git a/vllm/config/profiler.py b/vllm/config/profiler.py index 76cc546f3c9e..1162318f9286 100644 --- a/vllm/config/profiler.py +++ b/vllm/config/profiler.py @@ -9,7 +9,7 @@ from typing_extensions import Self import vllm.envs as envs -from vllm.config.utils import config +from vllm.config.utils import CompileFactors, config from vllm.logger import init_logger from vllm.utils.hashing import safe_hash @@ -88,6 +88,10 @@ def compute_hash(self) -> str: hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest() return hash_str + def compile_factors(self) -> CompileFactors: + # Profiling setup does not affect the computation graph, so hash neutral. + return {} + def _get_from_env_if_set(self, field_name: str, env_var_name: str) -> None: """Get field from env var if set, with deprecation warning.""" diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index cc0e944c7a80..0faf74c2b51b 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -269,6 +269,7 @@ def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: "speculative": _compile(self.speculative_config), "structured_outputs": _compile(self.structured_outputs_config), "observability": _compile(self.observability_config), + "profiler": _compile(self.profiler_config), "compilation": _compile(self.compilation_config), "kv_transfer": _compile(self.kv_transfer_config), "ec_transfer": _compile(self.ec_transfer_config), From 114a1706d7e640d282f5d4fa6d4e8fc181719f72 Mon Sep 17 00:00:00 2001 From: vnadathur Date: Wed, 14 Jan 2026 01:45:37 -0800 Subject: [PATCH 47/57] precom Signed-off-by: vnadathur --- vllm/config/speculative.py | 1 + vllm/config/vllm.py | 32 ++++++++++++++++---------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index 659075203a76..55352817ec3e 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -12,6 +12,7 @@ from vllm.config.parallel import ParallelConfig from vllm.config.utils import CompileFactors, config, normalize_value from vllm.logger import init_logger +from vllm.transformers_utils.config import get_hf_text_config from vllm.utils.import_utils import LazyLoader, has_arctic_inference if TYPE_CHECKING: diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 592dc4329576..87b06694db31 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -254,26 +254,26 @@ def compile_factors(self) -> CompileFactors: """ from vllm import __version__ - def _compile(config_obj: SupportsCompileFactors | None) -> CompileFactors: + def get_factors(config_obj: SupportsCompileFactors | None) -> CompileFactors: return {} if config_obj is None else config_obj.compile_factors() factors: dict[str, Any] = { "version": __version__, - "model": _compile(self.model_config), - "cache": _compile(self.cache_config), - "parallel": _compile(self.parallel_config), - "scheduler": _compile(self.scheduler_config), - "device": _compile(self.device_config), - "load": _compile(self.load_config), - "attention": _compile(self.attention_config), - "speculative": _compile(self.speculative_config), - "structured_outputs": _compile(self.structured_outputs_config), - "observability": _compile(self.observability_config), - "profiler": _compile(self.profiler_config), - "compilation": _compile(self.compilation_config), - "kv_transfer": _compile(self.kv_transfer_config), - "ec_transfer": _compile(self.ec_transfer_config), - "lora": _compile(self.lora_config), + "model": get_factors(self.model_config), + "cache": get_factors(self.cache_config), + "parallel": get_factors(self.parallel_config), + "scheduler": get_factors(self.scheduler_config), + "device": get_factors(self.device_config), + "load": get_factors(self.load_config), + "attention": get_factors(self.attention_config), + "speculative": get_factors(self.speculative_config), + "structured_outputs": get_factors(self.structured_outputs_config), + "observability": get_factors(self.observability_config), + "profiler": get_factors(self.profiler_config), + "compilation": get_factors(self.compilation_config), + "kv_transfer": get_factors(self.kv_transfer_config), + "ec_transfer": get_factors(self.ec_transfer_config), + "lora": get_factors(self.lora_config), } if self.additional_config: From a6b1afdd43a29a7f4cfc215958ff6ec22f9674bd Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Mon, 19 Jan 2026 21:03:06 -0500 Subject: [PATCH 48/57] fixed precom Signed-off-by: WorldExplored --- vllm/compilation/decorators.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py index 56a27392baf1..1ae6b860e6b1 100644 --- a/vllm/compilation/decorators.py +++ b/vllm/compilation/decorators.py @@ -26,6 +26,7 @@ ) from vllm.config.compilation import DynamicShapesType from vllm.config.utils import hash_factors +from vllm.forward_context import get_forward_context, is_forward_context_available from vllm.logger import init_logger from vllm.sequence import IntermediateTensors from vllm.utils.import_utils import resolve_obj_by_qualname From ef3b3b17accf23d047a3973f4b416402c8e80c50 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Thu, 22 Jan 2026 15:36:10 -0500 Subject: [PATCH 49/57] fixed precom Signed-off-by: WorldExplored --- vllm/compilation/backends.py | 4 --- vllm/compilation/caching.py | 14 ++++---- vllm/compilation/compiler_interface.py | 47 ++++---------------------- vllm/compilation/decorators.py | 2 -- vllm/config/attention.py | 1 - 5 files changed, 14 insertions(+), 54 deletions(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 5f6a183073d0..3e378e06f706 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -743,10 +743,6 @@ def configure_post_pass(self) -> None: self.inductor_config[self.pass_key] = self.pass_manager def __call__(self, graph: fx.GraphModule, example_inputs: Sequence[Any]) -> Any: - from .caching import ( - VllmSerializableFunction, - ) - vllm_config = self.vllm_config # Minimal hashing here with existing utilities, reused below. diff --git a/vllm/compilation/caching.py b/vllm/compilation/caching.py index a1d9a39a9932..b548447ff979 100644 --- a/vllm/compilation/caching.py +++ b/vllm/compilation/caching.py @@ -7,7 +7,7 @@ import pickle from collections.abc import Callable, Sequence from pathlib import Path -from typing import Any, Literal +from typing import Any, Literal, Protocol from unittest.mock import patch import torch @@ -17,16 +17,13 @@ from vllm.compilation.compiler_interface import get_inductor_factors from vllm.config import VllmConfig, get_current_vllm_config from vllm.config.utils import hash_factors -from vllm.compilation.decorators import VllmSerializableFunction from vllm.logger import init_logger from vllm.utils.hashing import safe_hash -try: - from torch._dynamo.aot_compile import SerializableCallable -except ImportError: - SerializableCallable = object -assert isinstance(SerializableCallable, type) +class SerializableCallable(Protocol): + def __call__(self, *args: Any, **kwargs: Any) -> Any: ... + logger = init_logger(__name__) @@ -165,6 +162,7 @@ def __setstate__(self, state: dict[str, dict[str, Any]]) -> None: self.submodule_bytes_store = state["submodule_bytes_store"] self.loaded_submodule_store = {} + class VllmSerializableFunction(SerializableCallable): """ A wrapper around a compiled function by vllm. It will forward the tensor @@ -368,6 +366,8 @@ def compute_env_and_config_hashes( config_factors = vllm_config.compile_factors() config_hash = hash_factors(config_factors) return env_hash, config_hash, env_factors, config_factors + + def reconstruct_serializable_fn_from_mega_artifact( state: dict[str, Any], standalone_compile_artifacts: "StandaloneCompiledArtifacts", diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py index d3a2e7684ab1..f59dbfb1397c 100644 --- a/vllm/compilation/compiler_interface.py +++ b/vllm/compilation/compiler_interface.py @@ -18,8 +18,6 @@ from vllm.config.utils import CompileFactors, Range from vllm.utils.torch_utils import is_torch_equal_or_newer -logger = init_logger(__name__) - class CompilerInterface: """ @@ -227,42 +225,12 @@ def compile( from torch._inductor import standalone_compile - supports_aot = is_torch_equal_or_newer("2.10.0.dev") - - if not supports_aot and envs.VLLM_USE_MEGA_AOT_ARTIFACT: - logger.error( - "CRITICAL: VLLM_USE_MEGA_AOT_ARTIFACT " - "is enabled but PyTorch version does not support 'aot' " - "parameter in standalone_compile. This requires PyTorch " - "2.10.0+. Falling back to non-AOT mode." - ) - - compile_kwargs = { - "dynamic_shapes": dynamic_shapes, - "options": { - "config_patches": current_config, - }, - } - - use_aot: bool = supports_aot and envs.VLLM_USE_MEGA_AOT_ARTIFACT - # only add 'aot' parameter if both supported and enabled... - # this will set bundled_autograd_cache - # https://github.com/pytorch/pytorch/blob/9bbc5b2905c260adf41bc866a732f9c121a2828a/torch/_inductor/standalone_compile.py#L359 # noqa - if use_aot: - compile_kwargs["aot"] = True # type: ignore[assignment] - - compiled_graph = standalone_compile(graph, example_inputs, **compile_kwargs) - - if use_aot: - from torch._inductor.standalone_compile import AOTCompiledArtifact - - assert isinstance(compiled_graph, AOTCompiledArtifact) - assert hasattr(compiled_graph, "serialize") - # just return the compiled graph and a key - # since we can serialize the bytes using to_bytes - # and reload it using the key when reading - return compiled_graph, None - + compiled_graph = standalone_compile( + graph, + example_inputs, + dynamic_shapes=dynamic_shapes, + options={"config_patches": current_config}, + ) # Save the compiled artifact to disk in the specified path assert key is not None path = os.path.join(self.cache_dir, key) @@ -642,8 +610,7 @@ def set_inductor_config(config: dict[str, Any], compile_range: Range) -> None: def set_functorch_config() -> None: - if not envs.VLLM_USE_MEGA_AOT_ARTIFACT: - torch._functorch.config.bundled_autograd_cache = False + torch._functorch.config.bundled_autograd_cache = False class EagerAdaptor(CompilerInterface): diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py index a38d0ce0e7bf..3df5ea6d636d 100644 --- a/vllm/compilation/decorators.py +++ b/vllm/compilation/decorators.py @@ -25,14 +25,12 @@ set_current_vllm_config, ) from vllm.config.compilation import DynamicShapesType -from vllm.config.utils import hash_factors from vllm.forward_context import get_forward_context, is_forward_context_available from vllm.logger import init_logger from vllm.sequence import IntermediateTensors from vllm.utils.import_utils import resolve_obj_by_qualname from vllm.utils.torch_utils import is_torch_equal_or_newer -from .caching import compute_env_and_config_hashes from .monitor import start_monitoring_torch_compile if TYPE_CHECKING: diff --git a/vllm/config/attention.py b/vllm/config/attention.py index bfc888a5c861..b7b91851f257 100644 --- a/vllm/config/attention.py +++ b/vllm/config/attention.py @@ -7,7 +7,6 @@ from pydantic.dataclasses import dataclass from vllm.config.utils import CompileFactors, config, get_compile_factors -from vllm.logger import init_logger from vllm.v1.attention.backends.registry import AttentionBackendEnum From b807f052242b9f3b53b7f68c260c8c255078470d Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Fri, 23 Jan 2026 12:32:25 -0500 Subject: [PATCH 50/57] addressed comments Signed-off-by: WorldExplored --- vllm/compilation/caching.py | 11 +++--- vllm/compilation/compiler_interface.py | 47 ++++++++++++++++++++++---- vllm/config/attention.py | 14 -------- vllm/config/scheduler.py | 16 +++++---- 4 files changed, 57 insertions(+), 31 deletions(-) diff --git a/vllm/compilation/caching.py b/vllm/compilation/caching.py index b548447ff979..e9b7a4e27764 100644 --- a/vllm/compilation/caching.py +++ b/vllm/compilation/caching.py @@ -7,7 +7,7 @@ import pickle from collections.abc import Callable, Sequence from pathlib import Path -from typing import Any, Literal, Protocol +from typing import Any, Literal from unittest.mock import patch import torch @@ -20,9 +20,12 @@ from vllm.logger import init_logger from vllm.utils.hashing import safe_hash +try: + from torch._dynamo.aot_compile import SerializableCallable +except ImportError: + SerializableCallable = object -class SerializableCallable(Protocol): - def __call__(self, *args: Any, **kwargs: Any) -> Any: ... +assert isinstance(SerializableCallable, type) logger = init_logger(__name__) @@ -163,7 +166,7 @@ def __setstate__(self, state: dict[str, dict[str, Any]]) -> None: self.loaded_submodule_store = {} -class VllmSerializableFunction(SerializableCallable): +class VllmSerializableFunction(SerializableCallable): # type: ignore[misc] """ A wrapper around a compiled function by vllm. It will forward the tensor inputs to the compiled function and return the result. diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py index f59dbfb1397c..b9b6768ea8be 100644 --- a/vllm/compilation/compiler_interface.py +++ b/vllm/compilation/compiler_interface.py @@ -16,8 +16,11 @@ from vllm.compilation.counter import compilation_counter from vllm.config import VllmConfig from vllm.config.utils import CompileFactors, Range +from vllm.logger import init_logger from vllm.utils.torch_utils import is_torch_equal_or_newer +logger = init_logger(__name__) + class CompilerInterface: """ @@ -225,12 +228,41 @@ def compile( from torch._inductor import standalone_compile - compiled_graph = standalone_compile( - graph, - example_inputs, - dynamic_shapes=dynamic_shapes, - options={"config_patches": current_config}, - ) + supports_aot = is_torch_equal_or_newer("2.10.0.dev") + + if not supports_aot and envs.VLLM_USE_MEGA_AOT_ARTIFACT: + logger.error( + "CRITICAL: VLLM_USE_MEGA_AOT_ARTIFACT " + "is enabled but PyTorch version does not support 'aot' " + "parameter in standalone_compile. This requires PyTorch " + "2.10.0+. Falling back to non-AOT mode." + ) + + compile_kwargs = { + "dynamic_shapes": dynamic_shapes, + "options": { + "config_patches": current_config, + }, + } + + use_aot: bool = supports_aot and envs.VLLM_USE_MEGA_AOT_ARTIFACT + # only add 'aot' parameter if both supported and enabled... + # this will set bundled_autograd_cache + # https://github.com/pytorch/pytorch/blob/9bbc5b2905c260adf41bc866a732f9c121a2828a/torch/_inductor/standalone_compile.py#L359 # noqa + if use_aot: + compile_kwargs["aot"] = True # type: ignore[assignment] + + compiled_graph = standalone_compile(graph, example_inputs, **compile_kwargs) + + if use_aot: + from torch._inductor.standalone_compile import AOTCompiledArtifact + + assert isinstance(compiled_graph, AOTCompiledArtifact) + assert hasattr(compiled_graph, "serialize") + # just return the compiled graph and a key + # since we can serialize the bytes using to_bytes + # and reload it using the key when reading + return compiled_graph, None # Save the compiled artifact to disk in the specified path assert key is not None path = os.path.join(self.cache_dir, key) @@ -610,7 +642,8 @@ def set_inductor_config(config: dict[str, Any], compile_range: Range) -> None: def set_functorch_config() -> None: - torch._functorch.config.bundled_autograd_cache = False + if not envs.VLLM_USE_MEGA_AOT_ARTIFACT: + torch._functorch.config.bundled_autograd_cache = False class EagerAdaptor(CompilerInterface): diff --git a/vllm/config/attention.py b/vllm/config/attention.py index b7b91851f257..2dd88eed0aaa 100644 --- a/vllm/config/attention.py +++ b/vllm/config/attention.py @@ -45,20 +45,6 @@ class AttentionConfig: disable_flashinfer_q_quantization: bool = False """If set, when using fp8 kv, do not quantize Q to fp8.""" - def compute_hash(self) -> str: - """ - Provide a hash that uniquely identifies all the configs - that affect the structure of the computation - graph from input ids/embeddings to the final hidden states, - excluding anything before input ids/embeddings and after - the final hidden states. - """ - from vllm.config.utils import get_hash_factors, hash_factors - - ignored_factors: list[str] = [] - factors = get_hash_factors(self, ignored_factors) - return hash_factors(factors) - def compile_factors(self) -> CompileFactors: """ Provide the factors that affect the compiled computation graph. diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index 8ad430a549fb..811575c0dfa9 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -185,12 +185,16 @@ def compile_factors(self) -> CompileFactors: excluding anything before input ids/embeddings and after the final hidden states. """ - # Only surface scheduler knobs that influence compiled shapes. Legacy - # scheduler hashing fed a list of factors (currently just - # max_num_batched_tokens) into safe_hash. The compile cache now hashes - # the nested factors dict via JSON, so we expose a dict payload to - # satisfy the CompileFactors protocol while keeping the factor content - # minimal to avoid unnecessary cache key churn. + # max_num_batched_tokens need to be included in the hash due + # to two reasons: + # 1. LoRA creates static buffers based on max_num_batched_tokens. + # The tensor sizes and strides get captured in the torch.compile + # graph explicitly. + # 2. Inductor decides whether using 32-bit or 64-bit indexing integer + # based on the data sizes. `max_num_batched_tokens` has an + # impact on that. For more details, please check + # https://github.com/vllm-project/vllm/issues/29585 + return {"max_num_batched_tokens": self.max_num_batched_tokens} @field_validator("scheduler_cls", "async_scheduling", mode="wrap") From 662fae03fc0bd1c18e8dbdeb11349275c9efc3cc Mon Sep 17 00:00:00 2001 From: vnadathur Date: Mon, 26 Jan 2026 11:19:34 -0800 Subject: [PATCH 51/57] add factor Signed-off-by: vnadathur --- vllm/config/compilation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index f5b0d8097982..941eb2da5918 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -626,6 +626,7 @@ def compile_factors(self) -> CompileFactors: "debug_dump_path", "cache_dir", "local_cache_dir", + "bs_to_padded_graph_size", "traced_files", "compilation_time", "static_forward_context", From a1973c5b12f868035d1eafd84a6e3f3381b32c87 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Mon, 26 Jan 2026 19:43:38 -0500 Subject: [PATCH 52/57] updated compilation Signed-off-by: WorldExplored --- vllm/config/compilation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 941eb2da5918..7f8f0d9f36ec 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -592,6 +592,9 @@ class CompilationConfig: Map from layer name to layer objects that need to be accessed outside model code, e.g., Attention, FusedMOE when dp_size>1.""" + bs_to_padded_graph_size: list[int] = field(default_factory=list, init=False) + """Runtime map from batch size to cudagraph padded size.""" + # Attention ops; used for piecewise cudagraphs # Use PyTorch operator format: "namespace::name" _attention_ops: ClassVar[list[str]] = [ From 8a8d890c4b6eb49e19807075b3402b9222da7671 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Mon, 26 Jan 2026 20:16:37 -0500 Subject: [PATCH 53/57] fixed json Signed-off-by: WorldExplored --- vllm/compilation/compiler_interface.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py index b9b6768ea8be..3667623572e0 100644 --- a/vllm/compilation/compiler_interface.py +++ b/vllm/compilation/compiler_interface.py @@ -15,7 +15,7 @@ import vllm.envs as envs from vllm.compilation.counter import compilation_counter from vllm.config import VllmConfig -from vllm.config.utils import CompileFactors, Range +from vllm.config.utils import CompileFactors, Range, normalize_value from vllm.logger import init_logger from vllm.utils.torch_utils import is_torch_equal_or_newer @@ -155,13 +155,13 @@ def get_inductor_factors() -> list[Any]: # summarize system state from torch._inductor.codecache import CacheBase - system_factors = CacheBase.get_system() + system_factors = normalize_value(CacheBase.get_system()) factors.append(system_factors) # summarize pytorch state from torch._inductor.codecache import torch_key - torch_factors = torch_key() + torch_factors = normalize_value(torch_key()) factors.append(torch_factors) return factors From b35d63a7a9ec38b8855d1519ee7eb2777afc8e5f Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Sat, 31 Jan 2026 19:02:42 -0500 Subject: [PATCH 54/57] fixed precom Signed-off-by: WorldExplored --- vllm/sampling_params.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 1d097852e194..42f768cf0bc5 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -9,7 +9,8 @@ from typing import Annotated, Any import msgspec -from pydantic.dataclasses import dataclass +from pydantic.dataclasses import dataclass as pydantic_dataclass +from typing_extensions import dataclass_transform from vllm.exceptions import VLLMValidationError from vllm.logger import init_logger @@ -17,6 +18,13 @@ from vllm.tokenizers import TokenizerLike from vllm.v1.serial_utils import PydanticMsgspecMixin + +# Keep pydantic runtime behavior while giving mypy dataclass semantics. +@dataclass_transform(field_specifiers=(field,)) +def dataclass(*args, **kwargs): + return pydantic_dataclass(*args, **kwargs) + + logger = init_logger(__name__) _SAMPLING_EPS = 1e-5 From 8842e87972ba79a6a65daa9e5febafc6e3834bb6 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Sat, 31 Jan 2026 19:49:48 -0500 Subject: [PATCH 55/57] precom Signed-off-by: WorldExplored --- vllm/compilation/pass_manager.py | 9 ++++----- vllm/config/vllm.py | 6 +++++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/vllm/compilation/pass_manager.py b/vllm/compilation/pass_manager.py index 4ce5fb0d7686..482dc23bc40c 100644 --- a/vllm/compilation/pass_manager.py +++ b/vllm/compilation/pass_manager.py @@ -154,11 +154,10 @@ def uuid(self) -> str: affects compilation caching. Its uuid depends on the UUIDs of all dependent passes and the pass config. See InductorPass for more info. """ - state = {"pass_config": self.pass_config.compile_factors(), "passes": []} - for pass_ in self.passes: - state["passes"].append(pass_.uuid()) - state["passes"].append(self.post_cleanup.uuid()) - state["passes"].append(self.fix_functionalization.uuid()) + passes: list[str] = [pass_.uuid() for pass_ in self.passes] + passes.append(self.post_cleanup.uuid()) + passes.append(self.fix_functionalization.uuid()) + state = {"pass_config": self.pass_config.compile_factors(), "passes": passes} # Include the compile range in the uuid to ensure that inductor # recompiles the graph for the new dynamic compile range. diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index eb273015806f..4b9e6f55d2c1 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -41,7 +41,7 @@ from .scheduler import SchedulerConfig from .speculative import SpeculativeConfig from .structured_outputs import StructuredOutputsConfig -from .utils import CompileFactors, SupportsCompileFactors, config +from .utils import CompileFactors, SupportsCompileFactors, config, hash_factors if TYPE_CHECKING: from transformers import PretrainedConfig @@ -307,6 +307,10 @@ def get_factors(config_obj: SupportsCompileFactors | None) -> CompileFactors: return factors + def compute_hash(self) -> str: + """Return a stable hash of the compilation-relevant factors.""" + return hash_factors(self.compile_factors()) + @property def needs_dp_coordinator(self) -> bool: """ From c9326f3f8bf9287a0a67566dee87b927e0825d34 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Tue, 3 Feb 2026 22:51:42 -0500 Subject: [PATCH 56/57] fixed premmit Signed-off-by: WorldExplored --- vllm/compilation/backends.py | 1 + vllm/config/ec_transfer.py | 2 -- vllm/config/kv_transfer.py | 3 --- vllm/config/pooler.py | 2 -- vllm/config/vllm.py | 9 +++++++-- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 9bdc7800bc43..a30d9dd83658 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -4,6 +4,7 @@ import ast import contextvars import dataclasses +import hashlib import json import operator import os diff --git a/vllm/config/ec_transfer.py b/vllm/config/ec_transfer.py index 83fd608f4793..d89d585a9bea 100644 --- a/vllm/config/ec_transfer.py +++ b/vllm/config/ec_transfer.py @@ -4,8 +4,6 @@ from dataclasses import field from typing import Any, Literal, get_args -from pydantic.dataclasses import dataclass - from vllm.config.utils import CompileFactors, config ECProducer = Literal["ec_producer"] diff --git a/vllm/config/kv_transfer.py b/vllm/config/kv_transfer.py index 7c3c3f3a233f..1facaf015381 100644 --- a/vllm/config/kv_transfer.py +++ b/vllm/config/kv_transfer.py @@ -5,10 +5,7 @@ from dataclasses import field from typing import Any, Literal, get_args -from pydantic.dataclasses import dataclass - from vllm.config.utils import CompileFactors, config -from vllm.utils.hashing import safe_hash KVProducer = Literal["kv_producer", "kv_both"] KVConsumer = Literal["kv_consumer", "kv_both"] diff --git a/vllm/config/pooler.py b/vllm/config/pooler.py index e18f21f6d6f1..ce549948fa1b 100644 --- a/vllm/config/pooler.py +++ b/vllm/config/pooler.py @@ -3,8 +3,6 @@ from typing import Literal, get_args -from pydantic.dataclasses import dataclass - from vllm.config.utils import CompileFactors, config from vllm.logger import init_logger diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index db9305430136..0f452842b759 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -8,7 +8,7 @@ import threading import time from contextlib import contextmanager -from dataclasses import is_dataclass +from dataclasses import is_dataclass, replace from datetime import datetime from enum import IntEnum from functools import lru_cache @@ -39,7 +39,12 @@ from .scheduler import SchedulerConfig from .speculative import EagleModelTypes, SpeculativeConfig from .structured_outputs import StructuredOutputsConfig -from .utils import CompileFactors, SupportsCompileFactors, SupportsHash, config, hash_factors +from .utils import ( + CompileFactors, + SupportsCompileFactors, + config, + hash_factors, +) if TYPE_CHECKING: from transformers import PretrainedConfig From c8cfbeadcc169ed7e4f495a3dc21da610afcad34 Mon Sep 17 00:00:00 2001 From: WorldExplored Date: Tue, 3 Feb 2026 23:43:44 -0500 Subject: [PATCH 57/57] precom Signed-off-by: WorldExplored --- vllm/config/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/config/utils.py b/vllm/config/utils.py index 27714c10acfd..146cd3667cce 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -32,7 +32,7 @@ DataclassInstance = Any ConfigType = type[DataclassInstance] -ConfigT = TypeVar("ConfigT", bound=ConfigType) +ConfigT = TypeVar("ConfigT", bound=DataclassInstance) CompileFactors = dict[str, object]