Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion tests/models/language/pooling/test_embedding.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os

import pytest

from vllm.config import PoolerConfig
Expand Down Expand Up @@ -33,7 +35,7 @@ def v1(run_with_both_engines):
# To avoid this problem, for now we skip v0 since it will be
# deprecated anyway.
pytest.param("ssmits/Qwen2-7B-Instruct-embed-base",
marks=[pytest.mark.skip_v0]),
marks=[pytest.mark.skip_v0, pytest.mark.cpu_model]),
# [Encoder-only]
pytest.param("BAAI/bge-base-en-v1.5",
marks=[
Expand All @@ -58,6 +60,9 @@ def test_models(
model,
monkeypatch,
) -> None:
if model == "intfloat/e5-mistral-7b-instruct" and current_platform.is_cpu(
) and os.environ.get("VLLM_USE_V1", "0") == "1":
pytest.skip("CPU V1 doesn't support sliding window")

if model == "BAAI/bge-multilingual-gemma2" and current_platform.is_rocm():
# ROCm Triton FA does not currently support sliding window attention
Expand Down
7 changes: 7 additions & 0 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1449,6 +1449,13 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
model_config=model_config) and _warn_or_fallback(
current_platform.device_name):
return False

if (current_platform.is_cpu()
and model_config.get_sliding_window() is not None):
_raise_or_fallback(feature_name="sliding window (CPU backend)",
recommend_to_remove=False)
return False

#############################################################

return True
Expand Down