diff --git a/tests/models/language/pooling/test_embedding.py b/tests/models/language/pooling/test_embedding.py index e29b4f6e8bec..5ef9f768c574 100644 --- a/tests/models/language/pooling/test_embedding.py +++ b/tests/models/language/pooling/test_embedding.py @@ -1,5 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import os + import pytest from vllm.config import PoolerConfig @@ -33,7 +35,7 @@ def v1(run_with_both_engines): # To avoid this problem, for now we skip v0 since it will be # deprecated anyway. pytest.param("ssmits/Qwen2-7B-Instruct-embed-base", - marks=[pytest.mark.skip_v0]), + marks=[pytest.mark.skip_v0, pytest.mark.cpu_model]), # [Encoder-only] pytest.param("BAAI/bge-base-en-v1.5", marks=[ @@ -58,6 +60,9 @@ def test_models( model, monkeypatch, ) -> None: + if model == "intfloat/e5-mistral-7b-instruct" and current_platform.is_cpu( + ) and os.environ.get("VLLM_USE_V1", "0") == "1": + pytest.skip("CPU V1 doesn't support sliding window") if model == "BAAI/bge-multilingual-gemma2" and current_platform.is_rocm(): # ROCm Triton FA does not currently support sliding window attention diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 7a88e3269a5e..bffc8ba8c907 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1449,6 +1449,13 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: model_config=model_config) and _warn_or_fallback( current_platform.device_name): return False + + if (current_platform.is_cpu() + and model_config.get_sliding_window() is not None): + _raise_or_fallback(feature_name="sliding window (CPU backend)", + recommend_to_remove=False) + return False + ############################################################# return True