Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions vllm/compilation/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from vllm.config import CompilationConfig, VllmConfig
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.utils import resolve_obj_by_qualname
from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname

from .compiler_interface import (CompilerInterface, EagerAdaptor,
InductorAdaptor, InductorStandaloneAdaptor)
Expand All @@ -29,7 +29,8 @@

def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface:
if compilation_config.use_inductor:
if envs.VLLM_TEST_STANDALONE_COMPILE:
if envs.VLLM_USE_STANDALONE_COMPILE and is_torch_equal_or_newer(
"2.8.0"):
logger.info("Using InductorStandaloneAdaptor")
return InductorStandaloneAdaptor()
else:
Expand Down
2 changes: 1 addition & 1 deletion vllm/compilation/compiler_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class InductorStandaloneAdaptor(CompilerInterface):
This is not on by default yet, but we plan to turn it on by default for
PyTorch 2.8.

Use VLLM_TEST_STANDALONE_COMPILE to toggle this on or off.
Use VLLM_USE_STANDALONE_COMPILE to toggle this on or off.
"""
name = "inductor_standalone"

Expand Down
14 changes: 8 additions & 6 deletions vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,10 @@ def maybe_convert_int(value: Optional[str]) -> Optional[int]:

def get_vllm_port() -> Optional[int]:
"""Get the port from VLLM_PORT environment variable.

Returns:
The port number as an integer if VLLM_PORT is set, None otherwise.

Raises:
ValueError: If VLLM_PORT is a URI, suggest k8s service discovery issue.
"""
Expand Down Expand Up @@ -300,9 +300,11 @@ def get_vllm_port() -> Optional[int]:
lambda: bool(
os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"),

# Internal flag to enable/disable Inductor standalone compile
"VLLM_TEST_STANDALONE_COMPILE":
lambda: os.environ.get("VLLM_TEST_STANDALONE_COMPILE", "0") != "0",
# Feature flag to enable/disable Inductor standalone compile.
# In torch <= 2.7 we ignore this flag; in torch >= 2.8 this is
# enabled by default.
"VLLM_USE_STANDALONE_COMPILE":
lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "1") == "1",

# local rank of the process in the distributed setting, used to determine
# the GPU device id
Expand Down Expand Up @@ -884,7 +886,7 @@ def factorize(name: str):
"VLLM_USE_TRITON_AWQ",
"VLLM_DP_RANK",
"VLLM_DP_SIZE",
"VLLM_TEST_STANDALONE_COMPILE",
"VLLM_USE_STANDALONE_COMPILE",
]
for key in environment_variables_to_hash:
if key in environment_variables:
Expand Down