diff --git a/tests/test_envs.py b/tests/test_envs.py index 62d529c36360..023767505f10 100644 --- a/tests/test_envs.py +++ b/tests/test_envs.py @@ -6,7 +6,54 @@ import pytest -from vllm.envs import env_list_with_choices, env_with_choices +import vllm.envs as envs +from vllm.envs import ( + enable_envs_cache, + env_list_with_choices, + env_with_choices, + environment_variables, +) + + +def test_getattr_without_cache(monkeypatch: pytest.MonkeyPatch): + assert envs.VLLM_HOST_IP == "" + assert envs.VLLM_PORT is None + monkeypatch.setenv("VLLM_HOST_IP", "1.1.1.1") + monkeypatch.setenv("VLLM_PORT", "1234") + assert envs.VLLM_HOST_IP == "1.1.1.1" + assert envs.VLLM_PORT == 1234 + # __getattr__ is not decorated with functools.cache + assert not hasattr(envs.__getattr__, "cache_info") + + +def test_getattr_with_cache(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("VLLM_HOST_IP", "1.1.1.1") + monkeypatch.setenv("VLLM_PORT", "1234") + # __getattr__ is not decorated with functools.cache + assert not hasattr(envs.__getattr__, "cache_info") + + # Enable envs cache and ignore ongoing environment changes + enable_envs_cache() + + # __getattr__ is not decorated with functools.cache + assert hasattr(envs.__getattr__, "cache_info") + start_hits = envs.__getattr__.cache_info().hits + + # 2 more hits due to VLLM_HOST_IP and VLLM_PORT accesses + assert envs.VLLM_HOST_IP == "1.1.1.1" + assert envs.VLLM_PORT == 1234 + assert envs.__getattr__.cache_info().hits == start_hits + 2 + + # All environment variables are cached + for environment_variable in environment_variables: + envs.__getattr__(environment_variable) + assert envs.__getattr__.cache_info().hits == start_hits + 2 + len( + environment_variables + ) + + # Reset envs.__getattr__ back to none-cached version to + # avoid affecting other tests + envs.__getattr__ = envs.__getattr__.__wrapped__ class TestEnvWithChoices: diff --git a/vllm/envs.py b/vllm/envs.py index 8d8c96297d91..b5c7f325f670 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import functools import hashlib import json import os @@ -1408,12 +1409,36 @@ def get_vllm_port() -> int | None: def __getattr__(name: str): - # lazy evaluation of environment variables + """ + Gets environment variables lazily. + + NOTE: After enable_envs_cache() invocation (which triggered after service + initialization), all environment variables will be cached. + """ if name in environment_variables: return environment_variables[name]() raise AttributeError(f"module {__name__!r} has no attribute {name!r}") +def enable_envs_cache() -> None: + """ + Enables caching of environment variables. This is useful for performance + reasons, as it avoids the need to re-evaluate environment variables on + every call. + + NOTE: Currently, it's invoked after service initialization to reduce + runtime overhead. This also means that environment variables should NOT + be updated after the service is initialized. + """ + # Tag __getattr__ with functools.cache + global __getattr__ + __getattr__ = functools.cache(__getattr__) + + # Cache all environment variables + for key in environment_variables: + __getattr__(key) + + def __dir__(): return list(environment_variables.keys()) diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 2c5d0fdc752e..a21f0715704a 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -20,6 +20,7 @@ from vllm.config import ParallelConfig, VllmConfig from vllm.distributed import stateless_destroy_torch_distributed_process_group from vllm.distributed.parallel_state import is_global_first_rank +from vllm.envs import enable_envs_cache from vllm.logger import init_logger from vllm.logging_utils.dump_input import dump_engine_exception from vllm.lora.request import LoRARequest @@ -601,6 +602,10 @@ def __init__( # If enable, attach GC debugger after static variable freeze. maybe_attach_gc_debug_callback() + # Enable environment variable cache (e.g. assume no more + # environment variable overrides after this point) + enable_envs_cache() + @contextmanager def _perform_handshakes( self, diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py index e28d29c19a9c..38e8f4ab85d9 100644 --- a/vllm/v1/executor/multiproc_executor.py +++ b/vllm/v1/executor/multiproc_executor.py @@ -33,6 +33,7 @@ get_pp_group, get_tp_group, ) +from vllm.envs import enable_envs_cache from vllm.logger import init_logger from vllm.utils import ( _maybe_force_spawn, @@ -455,6 +456,10 @@ def __init__( # Load model self.worker.load_model() + # Enable environment variable cache (e.g. assume no more + # environment variable overrides after this point) + enable_envs_cache() + @staticmethod def make_worker_process( vllm_config: VllmConfig,