Skip to content
Closed
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
92 commits
Select commit Hold shift + click to select a range
5e167cf
return factors; fix comments
Nov 20, 2025
dc5980a
refactored code
WorldExplored Nov 20, 2025
7b7fe2e
fixed vllm.py
WorldExplored Nov 20, 2025
f602d96
refactor compute_hash() to compile_factors()
Nov 20, 2025
ce5fc7e
check if .compile_factors exists on the subobject
Nov 20, 2025
f99a273
Merge branch 'main' into hash
Nov 20, 2025
5eed2d7
fix recusion
Nov 20, 2025
fd33225
envs.py update
Nov 20, 2025
77eed45
fix precommit
Nov 20, 2025
76c2761
precommit
Nov 20, 2025
9cdc0d7
addressed reviewer concerns
WorldExplored Nov 20, 2025
b3bca07
factors can be a list of recursive dicts, and we can call hash_factor…
Nov 21, 2025
3ab19fc
fix
Nov 21, 2025
755e864
fixed precommit
WorldExplored Nov 21, 2025
03ac8c4
Merge branch 'main' into hash
WorldExplored Nov 21, 2025
bdbfcb0
added split shared compilation helpers PR code
WorldExplored Nov 21, 2025
caab640
use hash_factors
Nov 21, 2025
e4dc2b7
fix returning factors
Nov 21, 2025
a23a880
fixed precommit
WorldExplored Nov 22, 2025
fc17f5a
Merge branch 'main' into hash
WorldExplored Nov 25, 2025
0b7477b
Merge branch 'main' into hash
WorldExplored Nov 26, 2025
ce55893
addressed reviewer concerns
WorldExplored Nov 26, 2025
9998552
fixed precommit
WorldExplored Nov 26, 2025
991a335
reviewer feedback
Nov 26, 2025
b026819
HashResult- CompileFactor
Nov 26, 2025
78e9060
accept proper dict
Nov 27, 2025
5e0a992
fixed pre-commit
WorldExplored Nov 27, 2025
fec924d
Merge branch 'main' into hash
WorldExplored Nov 27, 2025
4c6a0d1
Update vllm/config/vllm.py
WorldExplored Nov 28, 2025
8470733
Update vllm/config/vllm.py
WorldExplored Nov 28, 2025
a91542d
Merge branch 'main' into hash
WorldExplored Nov 29, 2025
42390f4
fixed pre-comm + reviewer feedback
WorldExplored Nov 29, 2025
69c780b
have adaptors pass dicts.
Dec 1, 2025
90df680
pre-com
Dec 1, 2025
89370fc
Merge branch 'main' into hash
WorldExplored Dec 2, 2025
0662dee
Merge branch 'main' into hash
WorldExplored Dec 3, 2025
9ef5ebc
Update vllm/config/utils.py
WorldExplored Dec 4, 2025
d643f93
Update vllm/config/vllm.py
WorldExplored Dec 4, 2025
00185c6
Update vllm/compilation/caching.py
WorldExplored Dec 4, 2025
0cb98a1
Merge branch 'main' into hash
WorldExplored Dec 8, 2025
1269570
fixed precommit
WorldExplored Dec 8, 2025
b0341df
Merge branch 'main' into hash
Dec 8, 2025
70ec192
fixed precomit
WorldExplored Dec 8, 2025
e282b67
Merge branch 'main' into hash
WorldExplored Dec 10, 2025
7c76f79
Merge branch 'main' into hash
WorldExplored Dec 10, 2025
883f3a5
Merge branch 'main' into hash
WorldExplored Dec 11, 2025
e97c1d5
Merge branch 'main' into hash
Dec 16, 2025
0ee4834
fix assert & precom
Dec 16, 2025
9587a86
precom
Dec 16, 2025
54c0e10
pre-com
Dec 16, 2025
b7dae52
Merge branch 'main' into hash
WorldExplored Dec 19, 2025
33f0410
Merge branch 'main' into hash
Dec 23, 2025
5673536
cleanup compilation config, merge, etc .,
Dec 23, 2025
ef957b7
fix passconfig
Dec 24, 2025
af811f5
Merge branch 'main' into hash
WorldExplored Dec 27, 2025
fbcaf9e
fixed circular imporrt
WorldExplored Dec 28, 2025
cd168a7
fixing cache breaks.
WorldExplored Dec 28, 2025
c46827e
Merge branch 'main' into hash
Jan 7, 2026
fee0ee8
fixed precom
WorldExplored Jan 8, 2026
7fbc6a6
Merge branch 'main' into hash
WorldExplored Jan 12, 2026
929d2f7
Merge branch 'main' into hash
WorldExplored Jan 12, 2026
a0b60a4
precom fix
WorldExplored Jan 12, 2026
8f3d1af
fixed cursor comments
WorldExplored Jan 13, 2026
dd759a0
Update vllm/config/utils.py
WorldExplored Jan 13, 2026
7558451
Addressed Reviewer Comments
WorldExplored Jan 13, 2026
6c212a3
Addressed concerns
WorldExplored Jan 14, 2026
a5da83c
Merge branch 'main' into hash
WorldExplored Jan 14, 2026
114a170
precom
Jan 14, 2026
2137afa
Merge branch 'main' into hash
Jan 16, 2026
4ebf938
Merge branch 'main' into hash
WorldExplored Jan 19, 2026
a6b1afd
fixed precom
WorldExplored Jan 20, 2026
8e85a2d
Merge branch 'main' into hash
WorldExplored Jan 22, 2026
ef3b3b1
fixed precom
WorldExplored Jan 22, 2026
b807f05
addressed comments
WorldExplored Jan 23, 2026
316f087
Merge branch 'main' into hash
WorldExplored Jan 26, 2026
662fae0
add factor
Jan 26, 2026
a1973c5
updated compilation
WorldExplored Jan 27, 2026
8a8d890
fixed json
WorldExplored Jan 27, 2026
2202b48
Merge branch 'main' into hash
Jan 27, 2026
4a19484
Merge branch 'main' into hash
WorldExplored Jan 27, 2026
97b3826
Merge branch 'main' into hash
WorldExplored Jan 31, 2026
b35d63a
fixed precom
WorldExplored Feb 1, 2026
8842e87
precom
WorldExplored Feb 1, 2026
56a914e
Merge branch 'main' into hash
WorldExplored Feb 2, 2026
ba09ff7
Merge branch 'main' into hash
WorldExplored Feb 3, 2026
e2ab420
Merge branch 'main' into hash
Feb 3, 2026
c9326f3
fixed premmit
WorldExplored Feb 4, 2026
c8cfbea
precom
WorldExplored Feb 4, 2026
a7dfc63
Merge upstream/main into hash
WorldExplored Mar 25, 2026
9881aac
Merge branch 'main' into hash
WorldExplored Mar 28, 2026
1095bd4
Merge branch 'main' into hash
WorldExplored Mar 31, 2026
fca44e3
Merge branch 'main' into hash
WorldExplored Apr 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions tests/compile/fullgraph/test_toy_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
VllmConfig,
set_current_vllm_config,
)
from vllm.config.utils import get_compile_factors
from vllm.forward_context import BatchDescriptor, set_forward_context
from vllm.utils.torch_utils import is_torch_equal_or_newer

Expand All @@ -45,16 +46,9 @@ class LlamaConfig:
tractable_init: bool = False
random_seed: int = 0

def compute_hash(self) -> str:
factors: list[Any] = []
for k, v in self.__dict__.items():
if k == "random_seed":
continue
factors.append((k, v))
factors.sort()
import hashlib

return hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest()
def compile_factors(self) -> dict[str, Any]:
ignored = {"random_seed"}
return get_compile_factors(self, ignored)

def __post_init__(self):
assert self.mlp_size >= self.hidden_size
Expand Down
8 changes: 4 additions & 4 deletions tests/config/test_config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import pytest

from vllm.config.utils import get_hash_factors, hash_factors, normalize_value
from vllm.config.utils import get_compile_factors, hash_factors, normalize_value

# Helpers

Expand All @@ -25,7 +25,7 @@ def expected_path(p_str: str = ".") -> str:
return p.expanduser().resolve().as_posix()


# Minimal dataclass to test get_hash_factors.
# Minimal dataclass to test get_compile_factors.
# Avoid importing heavy vLLM configs.
@dataclass
class SimpleConfig:
Expand Down Expand Up @@ -136,8 +136,8 @@ def test_enum_vs_int_disambiguation():
assert enum_val == "raw_logits"

# Build factor dicts from configs with int vs enum
f_int = get_hash_factors(SimpleConfig(1), set())
f_enum = get_hash_factors(SimpleConfig(DummyLogprobsMode.RAW_LOGITS), set())
f_int = get_compile_factors(SimpleConfig(1), set())
f_enum = get_compile_factors(SimpleConfig(DummyLogprobsMode.RAW_LOGITS), set())
# The int case remains a primitive value
assert f_int["a"] == 1
# The enum case becomes a tagged tuple ("module.QualName", "raw_logits")
Expand Down
8 changes: 4 additions & 4 deletions tests/config/test_multimodal_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ def test_mm_encoder_attn_backend_invalid():


def test_mm_encoder_attn_backend_hash_updates():
base_hash = MultiModalConfig().compute_hash()
overridden_hash = MultiModalConfig(
base_compile_signature = MultiModalConfig().compile_factors()
overridden_compile_signature = MultiModalConfig(
mm_encoder_attn_backend=AttentionBackendEnum.FLASH_ATTN
).compute_hash()
assert base_hash != overridden_hash
).compile_factors()
assert base_compile_signature != overridden_compile_signature
66 changes: 36 additions & 30 deletions vllm/compilation/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import ast
import dataclasses
import hashlib
import json
import operator
import os
Expand All @@ -13,6 +12,7 @@
from contextlib import contextmanager
from copy import deepcopy
from functools import partial
from pathlib import Path
from typing import Any

import torch
Expand All @@ -26,14 +26,18 @@
should_split,
)
from vllm.config import CompilationConfig, CUDAGraphMode, VllmConfig
from vllm.config.utils import hash_factors
from vllm.config.utils import CompileFactors, hash_factors
from vllm.logger import init_logger
from vllm.logging_utils import lazy
from vllm.platforms import current_platform
from vllm.utils.import_utils import resolve_obj_by_qualname
from vllm.utils.torch_utils import is_torch_equal_or_newer

from .caching import VllmSerializableFunction
from .caching import (
VllmSerializableFunction,
compute_env_and_config_hashes,
get_code_factors,
)
from .compiler_interface import (
CompilerInterface,
EagerAdaptor,
Expand Down Expand Up @@ -95,8 +99,8 @@ def __init__(self, compilation_config: CompilationConfig):
self.compilation_config = compilation_config
self.compiler = make_compiler(compilation_config)

def compute_hash(self, vllm_config: VllmConfig) -> str:
return self.compiler.compute_hash(vllm_config)
def compile_factors(self, vllm_config: VllmConfig) -> CompileFactors:
return self.compiler.compile_factors(vllm_config)

@contextmanager
def compile_context(self, runtime_shape: int | None = None):
Expand Down Expand Up @@ -596,42 +600,41 @@ def __call__(
vllm_config = self.vllm_config
# Minimal hashing here with existing utilities, reused below.

env_factors = envs.compile_factors()
env_hash = hash_factors(env_factors)
# Compute config/compiler/code hashes once and reuse
config_hash = vllm_config.compute_hash()
compiler_hash = self.compiler_manager.compute_hash(vllm_config)
forward_code_files = list(sorted(self.compilation_config.traced_files))
(
env_hash,
config_hash,
env_factors,
config_factors,
) = compute_env_and_config_hashes(vllm_config)
compiler_factors = self.compiler_manager.compile_factors(vllm_config)
compiler_hash = hash_factors(compiler_factors)
traced_files = set(self.compilation_config.traced_files)
forward_code_files = sorted(
(Path(filepath) for filepath in traced_files), key=str
)

logger.debug(
"Traced files (to be considered for compilation cache):\n%s",
lazy(lambda: "\n".join(forward_code_files)),
lazy(lambda: "\n".join(map(str, forward_code_files))),
)
hash_content = []
for filepath in forward_code_files:
hash_content.append(filepath)
if filepath == "<string>":
# This means the function was dynamically generated, with
# e.g. exec(). We can't actually check these.
continue
try:
with open(filepath) as f:
hash_content.append(f.read())
except Exception:
logger.warning("Failed to read file %s", filepath)
continue
code_hash = hashlib.sha256("\n".join(hash_content).encode()).hexdigest()
code_factors = get_code_factors(forward_code_files)
code_hash = hash_factors({"files": code_factors})
# Clear after consumption
self.compilation_config.traced_files.clear()
if not self.compilation_config.cache_dir:
# no provided cache dir, generate one based on the known factors
# that affects the compilation. if none of the factors change,
# the cache dir will be the same so that we can reuse the compiled
# graph.
factors = [env_hash, config_hash, code_hash, compiler_hash]
all_factors = {
"env": env_factors,
"config": config_factors,
"code": {"files": code_factors},
"compiler": compiler_factors,
}
# Use SHA-256 for cache key hashing to be consistent across
# compute_hash functions. Truncate for a short cache dir name.
hash_key = hashlib.sha256(str(factors).encode()).hexdigest()[:10]
# compile_factors functions. Truncate for a short cache dir name.
hash_key = hash_factors(all_factors)[:10]
cache_dir = os.path.join(
envs.VLLM_CACHE_ROOT, "torch_compile_cache", hash_key
)
Expand Down Expand Up @@ -686,9 +689,12 @@ def __call__(
json.dump(
Comment thread
This conversation was marked as resolved.
{
"env": env_factors, # raw factors used for env_hash
"config": config_factors,
"config_hash": config_hash,
"code_hash": code_hash,
"compiler": compiler_factors,
"compiler_hash": compiler_hash,
"code_hash": code_hash,
"code": code_factors,
},
f,
indent=2,
Expand Down
47 changes: 36 additions & 11 deletions vllm/compilation/caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import inspect
import os
import pickle
from pathlib import Path
Comment thread
This conversation was marked as resolved.
from unittest.mock import patch

import torch
Expand All @@ -25,6 +26,28 @@
logger = init_logger(__name__)


def get_code_factors(forward_code_files: list[Path]) -> list[dict[str, str]]:
"""Return per-file factors for compile cache hashing."""
code_factors: list[dict[str, str]] = []
for filepath in forward_code_files:
path_str = str(filepath)
entry: dict[str, str] = {"path": path_str}
if path_str == "<string>":
# Dynamically generated code (e.g., exec); nothing to hash.
code_factors.append(entry)
continue
try:
with filepath.open() as f:
content = f.read()
except Exception:
logger.warning("Failed to read file %s", path_str)
code_factors.append(entry)
continue
entry["hash"] = hash_factors({"content": content})
code_factors.append(entry)
Comment thread
WorldExplored marked this conversation as resolved.
return code_factors


class VllmSerializableFunction(SerializableCallable):
Comment thread
This conversation was marked as resolved.
Outdated
"""
A wrapper around a compiled function by vllm. It will forward the tensor
Expand Down Expand Up @@ -136,18 +159,20 @@ def co_name(self):
return "VllmSerializableFunction"


def compilation_config_hash_factors(vllm_config: VllmConfig) -> list[str]:
factors = []
# 0. factors come from the env, for example, The values of
# VLLM_PP_LAYER_PARTITION will affect the computation graph.
env_hash = hash_factors(envs.compile_factors())
factors.append(env_hash)
def compute_env_and_config_hashes(
vllm_config: VllmConfig,
) -> tuple[str, str, dict[str, object], dict[str, object]]:
"""
Return the hashed environment factors, config hash, and raw factors.
Both AOT and JIT cache paths rely on this helper to ensure their cache keys
stay in sync.
"""

# 1. factors come from the vllm_config (it mainly summarizes how the
# model is created)
config_hash = vllm_config.compute_hash()
factors.append(config_hash)
return factors
env_factors = envs.compile_factors()
env_hash = hash_factors(env_factors)
config_factors = vllm_config.compile_factors()
config_hash = hash_factors(config_factors)
return env_hash, config_hash, env_factors, config_factors


def _compute_code_hash_with_content(file_contents: dict[str, str]) -> str:
Expand Down
34 changes: 13 additions & 21 deletions vllm/compilation/compiler_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import vllm.envs as envs
from vllm.compilation.counter import compilation_counter
from vllm.config import VllmConfig
from vllm.utils.hashing import safe_hash
from vllm.config.utils import CompileFactors
from vllm.utils.torch_utils import is_torch_equal_or_newer


Expand Down Expand Up @@ -46,17 +46,17 @@ def initialize_cache(
"""
pass

def compute_hash(self, vllm_config: VllmConfig) -> str:
def compile_factors(self, vllm_config: VllmConfig) -> CompileFactors:
"""
Gather all the relevant information from the vLLM config,
to compute a hash so that we can cache the compiled model.
Gather compiler-specific factors that influence the generated code.

See [`VllmConfig.compute_hash`][vllm.config.VllmConfig.compute_hash]
to check what information
is already considered by default. This function should only
consider the information that is specific to the compiler.
See [`VllmConfig.compile_factors`][vllm.config.VllmConfig.compile_factors]
for the base configuration factors. This method should return any
additional data that uniquely identifies the compiler's contribution to
the cache key. Subclasses must return a dictionary; use an empty dict
when no compiler-specific data is needed.
"""
return ""
return {}

def compile(
self,
Expand Down Expand Up @@ -195,12 +195,8 @@ class InductorStandaloneAdaptor(CompilerInterface):
def __init__(self, save_format: Literal["binary", "unpacked"]):
self.save_format = save_format

def compute_hash(self, vllm_config: VllmConfig) -> str:
factors = get_inductor_factors()
hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()[
:10
]
return hash_str
def compile_factors(self, vllm_config: VllmConfig) -> CompileFactors:
return {"inductor_standalone": get_inductor_factors()}

def initialize_cache(
self, cache_dir: str, disable_cache: bool = False, prefix: str = ""
Expand Down Expand Up @@ -284,12 +280,8 @@ class InductorAdaptor(CompilerInterface):

name = "inductor"

def compute_hash(self, vllm_config: VllmConfig) -> str:
factors = get_inductor_factors()
hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()[
:10
]
return hash_str
def compile_factors(self, vllm_config: VllmConfig) -> CompileFactors:
return {"inductor": get_inductor_factors()}

def initialize_cache(
self, cache_dir: str, disable_cache: bool = False, prefix: str = ""
Expand Down
15 changes: 4 additions & 11 deletions vllm/compilation/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from vllm.utils.import_utils import resolve_obj_by_qualname
from vllm.utils.torch_utils import supports_dynamo

from .caching import compute_env_and_config_hashes
from .monitor import start_monitoring_torch_compile

logger = init_logger(__name__)
Expand Down Expand Up @@ -378,18 +379,10 @@ def __call__(self, *args, **kwargs):
serialized backend artifacts), then we need to generate a new AOT
compile artifact from scratch.
"""
# Validate that AOT compile is not used with unbacked dynamic
# shapes. aot_compile re-allocates backed symbols post dynamo!
if ds_type == DynamicShapesType.UNBACKED:
raise ValueError(
"AOT compilation is not compatible with UNBACKED dynamic shapes. "
"Please use BACKED or BACKED_SIZE_OBLIVIOUS dynamic shapes type "
"when VLLM_USE_AOT_COMPILE is enabled."
)
from .caching import compilation_config_hash_factors

factors: list[str] = compilation_config_hash_factors(self.vllm_config)

# Keep AOT cache key in sync with JIT: env factors + config hash + model.
env_hash, config_hash, *_ = compute_env_and_config_hashes(self.vllm_config)
factors: list[str] = [env_hash, config_hash]
factors.append(_model_hash_key(self.forward))
hash_key = hashlib.sha256(str(factors).encode()).hexdigest()
Comment thread
This conversation was marked as resolved.

Expand Down
3 changes: 2 additions & 1 deletion vllm/compilation/pass_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,10 @@ def uuid(self):
affects compilation caching. Its uuid depends on the UUIDs of all
dependent passes and the pass config. See InductorPass for more info.
"""
state = {"pass_config": self.pass_config.compute_hash(), "passes": []}
state = {"pass_config": self.pass_config.compile_factors(), "passes": []}
for pass_ in self.passes:
state["passes"].append(pass_.uuid())
state["passes"].append(self.post_cleanup.uuid())
state["passes"].append(self.fix_functionalization.uuid())
Comment thread
This conversation was marked as resolved.
Outdated

return InductorPass.hash_dict(state)
Loading
Loading