vllm-project · ghost · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/tests/compile/fullgraph/test_toy_llama.py b/tests/compile/fullgraph/test_toy_llama.py
@@ -26,6 +26,7 @@
     VllmConfig,
     set_current_vllm_config,
 )
+from vllm.config.utils import get_compile_factors
 from vllm.forward_context import BatchDescriptor, set_forward_context
 from vllm.utils.torch_utils import is_torch_equal_or_newer
 
@@ -45,16 +46,9 @@ class LlamaConfig:
     tractable_init: bool = False
     random_seed: int = 0
 
-    def compute_hash(self) -> str:
-        factors: list[Any] = []
-        for k, v in self.__dict__.items():
-            if k == "random_seed":
-                continue
-            factors.append((k, v))
-        factors.sort()
-        import hashlib
-
-        return hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest()
+    def compile_factors(self) -> dict[str, Any]:
+        ignored = {"random_seed"}
+        return get_compile_factors(self, ignored)
 
     def __post_init__(self):
         assert self.mlp_size >= self.hidden_size

diff --git a/tests/config/test_config_utils.py b/tests/config/test_config_utils.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from vllm.config.utils import get_hash_factors, hash_factors, normalize_value
+from vllm.config.utils import get_compile_factors, hash_factors, normalize_value
 
 # Helpers
 
@@ -25,7 +25,7 @@ def expected_path(p_str: str = ".") -> str:
     return p.expanduser().resolve().as_posix()
 
 
-# Minimal dataclass to test get_hash_factors.
+# Minimal dataclass to test get_compile_factors.
 # Avoid importing heavy vLLM configs.
 @dataclass
 class SimpleConfig:
@@ -136,8 +136,8 @@ def test_enum_vs_int_disambiguation():
     assert enum_val == "raw_logits"
 
     # Build factor dicts from configs with int vs enum
-    f_int = get_hash_factors(SimpleConfig(1), set())
-    f_enum = get_hash_factors(SimpleConfig(DummyLogprobsMode.RAW_LOGITS), set())
+    f_int = get_compile_factors(SimpleConfig(1), set())
+    f_enum = get_compile_factors(SimpleConfig(DummyLogprobsMode.RAW_LOGITS), set())
     # The int case remains a primitive value
     assert f_int["a"] == 1
     # The enum case becomes a tagged tuple ("module.QualName", "raw_logits")

diff --git a/tests/config/test_multimodal_config.py b/tests/config/test_multimodal_config.py
@@ -18,8 +18,8 @@ def test_mm_encoder_attn_backend_invalid():
 
 
 def test_mm_encoder_attn_backend_hash_updates():
-    base_hash = MultiModalConfig().compute_hash()
-    overridden_hash = MultiModalConfig(
+    base_compile_signature = MultiModalConfig().compile_factors()
+    overridden_compile_signature = MultiModalConfig(
         mm_encoder_attn_backend=AttentionBackendEnum.FLASH_ATTN
-    ).compute_hash()
-    assert base_hash != overridden_hash
+    ).compile_factors()
+    assert base_compile_signature != overridden_compile_signature
@@ -3,7 +3,6 @@
 
 import ast
 import dataclasses
-import hashlib
 import json
 import operator
 import os
@@ -13,6 +12,7 @@
 from contextlib import contextmanager
 from copy import deepcopy
 from functools import partial
+from pathlib import Path
 from typing import Any
 
 import torch
@@ -26,14 +26,18 @@
     should_split,
 )
 from vllm.config import CompilationConfig, CUDAGraphMode, VllmConfig
-from vllm.config.utils import hash_factors
+from vllm.config.utils import CompileFactors, hash_factors
 from vllm.logger import init_logger
 from vllm.logging_utils import lazy
 from vllm.platforms import current_platform
 from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.utils.torch_utils import is_torch_equal_or_newer
 
-from .caching import VllmSerializableFunction
+from .caching import (
+    VllmSerializableFunction,
+    compute_env_and_config_hashes,
+    get_code_factors,
+)
 from .compiler_interface import (
     CompilerInterface,
     EagerAdaptor,
@@ -95,8 +99,8 @@ def __init__(self, compilation_config: CompilationConfig):
         self.compilation_config = compilation_config
         self.compiler = make_compiler(compilation_config)
 
-    def compute_hash(self, vllm_config: VllmConfig) -> str:
-        return self.compiler.compute_hash(vllm_config)
+    def compile_factors(self, vllm_config: VllmConfig) -> CompileFactors:
+        return self.compiler.compile_factors(vllm_config)
 
     @contextmanager
     def compile_context(self, runtime_shape: int | None = None):
@@ -596,42 +600,41 @@ def __call__(
         vllm_config = self.vllm_config
         # Minimal hashing here with existing utilities, reused below.
 
-        env_factors = envs.compile_factors()
-        env_hash = hash_factors(env_factors)
-        # Compute config/compiler/code hashes once and reuse
-        config_hash = vllm_config.compute_hash()
-        compiler_hash = self.compiler_manager.compute_hash(vllm_config)
-        forward_code_files = list(sorted(self.compilation_config.traced_files))
+        (
+            env_hash,
+            config_hash,
+            env_factors,
+            config_factors,
+        ) = compute_env_and_config_hashes(vllm_config)
+        compiler_factors = self.compiler_manager.compile_factors(vllm_config)
+        compiler_hash = hash_factors(compiler_factors)
+        traced_files = set(self.compilation_config.traced_files)
+        forward_code_files = sorted(
+            (Path(filepath) for filepath in traced_files), key=str
+        )
 
         logger.debug(
             "Traced files (to be considered for compilation cache):\n%s",
-            lazy(lambda: "\n".join(forward_code_files)),
+            lazy(lambda: "\n".join(map(str, forward_code_files))),
         )
-        hash_content = []
-        for filepath in forward_code_files:
-            hash_content.append(filepath)
-            if filepath == "<string>":
-                # This means the function was dynamically generated, with
-                # e.g. exec(). We can't actually check these.
-                continue
-            try:
-                with open(filepath) as f:
-                    hash_content.append(f.read())
-            except Exception:
-                logger.warning("Failed to read file %s", filepath)
-                continue
-        code_hash = hashlib.sha256("\n".join(hash_content).encode()).hexdigest()
+        code_factors = get_code_factors(forward_code_files)
+        code_hash = hash_factors({"files": code_factors})
         # Clear after consumption
         self.compilation_config.traced_files.clear()
         if not self.compilation_config.cache_dir:
             # no provided cache dir, generate one based on the known factors
             # that affects the compilation. if none of the factors change,
             # the cache dir will be the same so that we can reuse the compiled
             # graph.
-            factors = [env_hash, config_hash, code_hash, compiler_hash]
+            all_factors = {
+                "env": env_factors,
+                "config": config_factors,
+                "code": {"files": code_factors},
+                "compiler": compiler_factors,
+            }
             # Use SHA-256 for cache key hashing to be consistent across
-            # compute_hash functions. Truncate for a short cache dir name.
-            hash_key = hashlib.sha256(str(factors).encode()).hexdigest()[:10]
+            # compile_factors functions. Truncate for a short cache dir name.
+            hash_key = hash_factors(all_factors)[:10]
             cache_dir = os.path.join(
                 envs.VLLM_CACHE_ROOT, "torch_compile_cache", hash_key
             )
@@ -686,9 +689,12 @@ def __call__(
                     json.dump(
                         {
                             "env": env_factors,  # raw factors used for env_hash
+                            "config": config_factors,
                             "config_hash": config_hash,
-                            "code_hash": code_hash,
+                            "compiler": compiler_factors,
                             "compiler_hash": compiler_hash,
+                            "code_hash": code_hash,
+                            "code": code_factors,
                         },
                         f,
                         indent=2,

@@ -4,6 +4,7 @@
 import inspect
 import os
 import pickle
+from pathlib import Path
 from unittest.mock import patch
 
 import torch
@@ -25,6 +26,28 @@
 logger = init_logger(__name__)
 
 
+def get_code_factors(forward_code_files: list[Path]) -> list[dict[str, str]]:
+    """Return per-file factors for compile cache hashing."""
+    code_factors: list[dict[str, str]] = []
+    for filepath in forward_code_files:
+        path_str = str(filepath)
+        entry: dict[str, str] = {"path": path_str}
+        if path_str == "<string>":
+            # Dynamically generated code (e.g., exec); nothing to hash.
+            code_factors.append(entry)
+            continue
+        try:
+            with filepath.open() as f:
+                content = f.read()
+        except Exception:
+            logger.warning("Failed to read file %s", path_str)
+            code_factors.append(entry)
+            continue
+        entry["hash"] = hash_factors({"content": content})
+        code_factors.append(entry)
+    return code_factors
+
+
 class VllmSerializableFunction(SerializableCallable):
     """
     A wrapper around a compiled function by vllm. It will forward the tensor
@@ -136,18 +159,20 @@ def co_name(self):
         return "VllmSerializableFunction"
 
 
-def compilation_config_hash_factors(vllm_config: VllmConfig) -> list[str]:
-    factors = []
-    # 0. factors come from the env, for example, The values of
-    # VLLM_PP_LAYER_PARTITION will affect the computation graph.
-    env_hash = hash_factors(envs.compile_factors())
-    factors.append(env_hash)
+def compute_env_and_config_hashes(
+    vllm_config: VllmConfig,
+) -> tuple[str, str, dict[str, object], dict[str, object]]:
+    """
+    Return the hashed environment factors, config hash, and raw factors.
+    Both AOT and JIT cache paths rely on this helper to ensure their cache keys
+    stay in sync.
+    """
 
-    # 1. factors come from the vllm_config (it mainly summarizes how the
-    #    model is created)
-    config_hash = vllm_config.compute_hash()
-    factors.append(config_hash)
-    return factors
+    env_factors = envs.compile_factors()
+    env_hash = hash_factors(env_factors)
+    config_factors = vllm_config.compile_factors()
+    config_hash = hash_factors(config_factors)
+    return env_hash, config_hash, env_factors, config_factors
 
 
 def _compute_code_hash_with_content(file_contents: dict[str, str]) -> str:

@@ -15,7 +15,7 @@
 import vllm.envs as envs
 from vllm.compilation.counter import compilation_counter
 from vllm.config import VllmConfig
-from vllm.utils.hashing import safe_hash
+from vllm.config.utils import CompileFactors
 from vllm.utils.torch_utils import is_torch_equal_or_newer
 
 
@@ -46,17 +46,17 @@ def initialize_cache(
         """
         pass
 
-    def compute_hash(self, vllm_config: VllmConfig) -> str:
+    def compile_factors(self, vllm_config: VllmConfig) -> CompileFactors:
         """
-        Gather all the relevant information from the vLLM config,
-        to compute a hash so that we can cache the compiled model.
+        Gather compiler-specific factors that influence the generated code.
 
-        See [`VllmConfig.compute_hash`][vllm.config.VllmConfig.compute_hash]
-        to check what information
-        is already considered by default. This function should only
-        consider the information that is specific to the compiler.
+        See [`VllmConfig.compile_factors`][vllm.config.VllmConfig.compile_factors]
+        for the base configuration factors. This method should return any
+        additional data that uniquely identifies the compiler's contribution to
+        the cache key. Subclasses must return a dictionary; use an empty dict
+        when no compiler-specific data is needed.
         """
-        return ""
+        return {}
 
     def compile(
         self,
@@ -195,12 +195,8 @@ class InductorStandaloneAdaptor(CompilerInterface):
     def __init__(self, save_format: Literal["binary", "unpacked"]):
         self.save_format = save_format
 
-    def compute_hash(self, vllm_config: VllmConfig) -> str:
-        factors = get_inductor_factors()
-        hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()[
-            :10
-        ]
-        return hash_str
+    def compile_factors(self, vllm_config: VllmConfig) -> CompileFactors:
+        return {"inductor_standalone": get_inductor_factors()}
 
     def initialize_cache(
         self, cache_dir: str, disable_cache: bool = False, prefix: str = ""
@@ -284,12 +280,8 @@ class InductorAdaptor(CompilerInterface):
 
     name = "inductor"
 
-    def compute_hash(self, vllm_config: VllmConfig) -> str:
-        factors = get_inductor_factors()
-        hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()[
-            :10
-        ]
-        return hash_str
+    def compile_factors(self, vllm_config: VllmConfig) -> CompileFactors:
+        return {"inductor": get_inductor_factors()}
 
     def initialize_cache(
         self, cache_dir: str, disable_cache: bool = False, prefix: str = ""

diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py
@@ -30,6 +30,7 @@
 from vllm.utils.import_utils import resolve_obj_by_qualname
 from vllm.utils.torch_utils import supports_dynamo
 
+from .caching import compute_env_and_config_hashes
 from .monitor import start_monitoring_torch_compile
 
 logger = init_logger(__name__)
@@ -378,18 +379,10 @@ def __call__(self, *args, **kwargs):
             serialized backend artifacts), then we need to generate a new AOT
             compile artifact from scratch.
             """
-            # Validate that AOT compile is not used with unbacked dynamic
-            # shapes. aot_compile re-allocates backed symbols post dynamo!
-            if ds_type == DynamicShapesType.UNBACKED:
-                raise ValueError(
-                    "AOT compilation is not compatible with UNBACKED dynamic shapes. "
-                    "Please use BACKED or BACKED_SIZE_OBLIVIOUS dynamic shapes type "
-                    "when VLLM_USE_AOT_COMPILE is enabled."
-                )
-            from .caching import compilation_config_hash_factors
-
-            factors: list[str] = compilation_config_hash_factors(self.vllm_config)
 
+            # Keep AOT cache key in sync with JIT: env factors + config hash + model.
+            env_hash, config_hash, *_ = compute_env_and_config_hashes(self.vllm_config)
+            factors: list[str] = [env_hash, config_hash]
             factors.append(_model_hash_key(self.forward))
             hash_key = hashlib.sha256(str(factors).encode()).hexdigest()
 

diff --git a/vllm/compilation/pass_manager.py b/vllm/compilation/pass_manager.py
@@ -128,9 +128,10 @@ def uuid(self):
         affects compilation caching. Its uuid depends on the UUIDs of all
         dependent passes and the pass config. See InductorPass for more info.
         """
-        state = {"pass_config": self.pass_config.compute_hash(), "passes": []}
+        state = {"pass_config": self.pass_config.compile_factors(), "passes": []}
         for pass_ in self.passes:
             state["passes"].append(pass_.uuid())
+        state["passes"].append(self.post_cleanup.uuid())
         state["passes"].append(self.fix_functionalization.uuid())
 
         return InductorPass.hash_dict(state)