diff --git a/tests/test_config.py b/tests/test_config.py index 5c01d652a17a..3669c127e5dd 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -148,6 +148,13 @@ def test_is_default_v2_model_runner_model(model_config, expected): assert VllmConfig._is_default_v2_model_runner_model(config) is expected +def test_use_v2_model_runner_defaults_to_v1_when_kv_connector_present(): + config = SimpleNamespace(kv_transfer_config=object()) + with patch.object(envs, "VLLM_USE_V2_MODEL_RUNNER", None): + result = VllmConfig.use_v2_model_runner.fget(config) + assert result is False + + @pytest.mark.skip_global_cleanup def test_with_hf_config_populates_missing_architectures_from_causal_lm_mapping( monkeypatch, diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 5d45de6ff33a..5414b9d04fde 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -498,6 +498,10 @@ def use_v2_model_runner(self) -> bool: if use_v2_model_runner is not None: return use_v2_model_runner + # KVCache layout changes are breaking, let's stick with v1 for now (see #42846) + if self.kv_transfer_config is not None: + return False + if not self._is_default_v2_model_runner_model(): return False