diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index c4ea4b675649..d6eac92626d7 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -375,6 +375,7 @@ steps: - tests/compile commands: - pytest -v -s compile/test_pass_manager.py + - pytest -v -s compile/test_config.py - pytest -v -s compile/test_fusion.py - pytest -v -s compile/test_fusion_attn.py - pytest -v -s compile/test_silu_mul_quant_fusion.py diff --git a/tests/compile/test_config.py b/tests/compile/test_config.py index 7afd6251bbbd..292d2448528e 100644 --- a/tests/compile/test_config.py +++ b/tests/compile/test_config.py @@ -4,7 +4,8 @@ import vllm from vllm.compilation.counter import compilation_counter -from vllm.config import CompilationConfig, VllmConfig +from vllm.config import (CompilationConfig, CUDAGraphMode, ModelConfig, + VllmConfig) from vllm.utils import _is_torch_equal_or_newer @@ -18,12 +19,35 @@ def test_version(): def test_use_cudagraphs_dynamic(monkeypatch): assert vllm.envs.VLLM_USE_V1 - vllm_config = VllmConfig() - assert vllm_config.compilation_config.use_cudagraph + # cudagraph_mode=PIECEWISE by default + config = VllmConfig() + assert config.compilation_config.cudagraph_mode == CUDAGraphMode.PIECEWISE + + config2 = VllmConfig(compilation_config=CompilationConfig( + cudagraph_mode=CUDAGraphMode.NONE)) + config3 = VllmConfig(compilation_config=CompilationConfig( + cudagraph_mode=CUDAGraphMode.FULL)) + config4 = VllmConfig(model_config=ModelConfig(enforce_eager=True)) + assert config2.compilation_config.cudagraph_mode == CUDAGraphMode.NONE + assert config3.compilation_config.cudagraph_mode == CUDAGraphMode.FULL + assert config4.compilation_config.cudagraph_mode == CUDAGraphMode.NONE monkeypatch.setenv('VLLM_USE_V1', '0') - vllm_config = VllmConfig() - assert not vllm_config.compilation_config.use_cudagraph + # TODO remove when V0 fully removed: + # https://github.com/vllm-project/vllm/issues/18571 + + config = VllmConfig() # cudagraph_mode=NONE by default + assert config.compilation_config.cudagraph_mode == CUDAGraphMode.NONE + + config2 = VllmConfig(compilation_config=CompilationConfig( + cudagraph_mode=CUDAGraphMode.PIECEWISE)) + config3 = VllmConfig(compilation_config=CompilationConfig( + cudagraph_mode=CUDAGraphMode.FULL)) + config4 = VllmConfig(model_config=ModelConfig(enforce_eager=True)) + + assert config2.compilation_config.cudagraph_mode == CUDAGraphMode.PIECEWISE + assert config3.compilation_config.cudagraph_mode == CUDAGraphMode.FULL + assert config4.compilation_config.cudagraph_mode == CUDAGraphMode.NONE def test_custom_op(): diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 92fc68f8927c..0cb8593f43f8 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -483,8 +483,8 @@ def __post_init__(self): # settings (see the below code). if self.compilation_config.level is None: if envs.VLLM_USE_V1: - if (self.model_config is not None - and not self.model_config.enforce_eager): + if self.model_config is None or \ + not self.model_config.enforce_eager: self.compilation_config.level = CompilationLevel.PIECEWISE else: self.compilation_config.level = \