diff --git a/tests/entrypoints/pooling/score/test_online_score_vision.py b/tests/entrypoints/pooling/score/test_online_score_vision.py index 9e9bc3fec881..bd53153c33fc 100644 --- a/tests/entrypoints/pooling/score/test_online_score_vision.py +++ b/tests/entrypoints/pooling/score/test_online_score_vision.py @@ -1,12 +1,15 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import json + import pytest import requests from tests.utils import VLLM_PATH, RemoteOpenAIServer from vllm.entrypoints.pooling.score.protocol import RerankResponse, ScoreResponse from vllm.multimodal.utils import encode_image_url, fetch_image +from vllm.platforms import current_platform MODEL_NAME = "Qwen/Qwen3-VL-Reranker-2B" HF_OVERRIDES = { @@ -15,6 +18,60 @@ "is_original_qwen3_reranker": True, } +ROCM_ATTN_BACKENDS = [ + "ROCM_ATTN", + "ROCM_AITER_FA", + "TRITON_ATTN", + "FLEX_ATTENTION", +] + +ATTN_BACKENDS = ROCM_ATTN_BACKENDS if current_platform.is_rocm() else [] + +# Per-backend tolerance with explicit entries; "default" is the fallback +BACKEND_TOL: dict[str, float] = { + "default": 0.05, # 5% tolerance for other backends (e.g. FLASH_ATTN) + # Relaxed tolerances for ROCm attn + # See: https://github.com/vllm-project/vllm/issues/35569 + "ROCM_ATTN": 0.09, # gfx950:~8.45%, gfx942:~3.70% + "ROCM_AITER_FA": 0.045, # gfx950:~2.00%, gfx942:~0.80% + "TRITON_ATTN": 0.045, # gfx950:~3.00%, gfx942:~2.20% + "FLEX_ATTENTION": 0.045, # gfx950:~3.25%, gfx942:~1.10% +} + +# ROCm: disable skinny GEMM to avoid non-deterministic results from +# atomic reductions in wvSplitKrc kernel. +# See: https://github.com/vllm-project/vllm/pull/33493#issuecomment-3906083975 +ROCM_ENV_OVERRIDES = ( + {"VLLM_ROCM_USE_SKINNY_GEMM": "0"} if current_platform.is_rocm() else {} +) +# ROCm: disable prefix caching and eliminate batch variance to reduce +# test flakiness. +ROCM_EXTRA_ARGS = ( + ["--no-enable-prefix-caching", "--max-num-seqs", "1"] + if current_platform.is_rocm() + else [] +) + + +def get_tol(backend: str) -> float: + return BACKEND_TOL.get(backend, BACKEND_TOL["default"]) + + +def assert_score(actual: float, expected: float, backend: str, label: str): + tol = get_tol(backend) + diff = abs(actual - expected) + rel_diff = diff / abs(expected) if expected != 0 else diff + print( + f"[{backend}] {label}: actual={actual:.6f} expected={expected:.6f} " + f"diff={diff:.6f} rel_diff={rel_diff:.4f} tol={tol}" + ) + assert actual == pytest.approx(expected, rel=tol), ( + f"[{backend}] {label}: score mismatch — " + f"actual={actual:.6f}, expected={expected:.6f}, " + f"rel_diff={rel_diff:.4f}, tol={tol}" + ) + + query = "A cat standing in the snow." document = "This product was excellent and exceeded my expectations." image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/cat_snow.jpg" @@ -36,28 +93,37 @@ TEXT_VS_TEXT = 0.10040374100208282 TEXT_VS_IMAGE = 0.7423753142356873 TEXT_VS_TEXT_PLUS_IMAGE = 0.5298863053321838 -TOL = 0.05 -@pytest.fixture(scope="module") -def server(): +@pytest.fixture(scope="module", params=ATTN_BACKENDS) +def server(request): + backend = request.param + print(f"\n=== Starting server with attention backend: {backend} ===") args = [ "--enforce-eager", "--max-model-len", "8192", "--chat-template", str(VLLM_PATH / "examples/pooling/score/template/qwen3_vl_reranker.jinja"), - ] + "--attention-config", + json.dumps({"backend": backend}), + ] + ROCM_EXTRA_ARGS + + env = dict(ROCM_ENV_OVERRIDES) + if backend != "ROCM_AITER_FA": + env["VLLM_ROCM_USE_AITER"] = "0" with RemoteOpenAIServer( - MODEL_NAME, args, override_hf_configs=HF_OVERRIDES + MODEL_NAME, args, override_hf_configs=HF_OVERRIDES, env_dict=env ) as remote_server: - yield remote_server + print(f"=== Server ready with backend: {backend} ===") + yield remote_server, backend -def test_score_api_queries_str_documents_str(server: RemoteOpenAIServer): +def test_score_api_queries_str_documents_str(server: tuple[RemoteOpenAIServer, str]): + remote_server, backend = server score_response = requests.post( - server.url_for("score"), + remote_server.url_for("score"), json={ "model": MODEL_NAME, "queries": query, @@ -71,12 +137,15 @@ def test_score_api_queries_str_documents_str(server: RemoteOpenAIServer): assert score.data is not None assert len(score.data) == 1 assert score.usage.prompt_tokens == 81 - assert score.data[0].score == pytest.approx(TEXT_VS_TEXT, rel=TOL) + assert_score(score.data[0].score, TEXT_VS_TEXT, backend, "text_vs_text") -def test_score_api_queries_str_documents_text_content(server: RemoteOpenAIServer): +def test_score_api_queries_str_documents_text_content( + server: tuple[RemoteOpenAIServer, str], +): + remote_server, backend = server score_response = requests.post( - server.url_for("score"), + remote_server.url_for("score"), json={ "model": MODEL_NAME, "queries": query, @@ -90,12 +159,15 @@ def test_score_api_queries_str_documents_text_content(server: RemoteOpenAIServer assert score.data is not None assert len(score.data) == 1 assert score.usage.prompt_tokens == 81 - assert score.data[0].score == pytest.approx(TEXT_VS_TEXT, rel=TOL) + assert_score(score.data[0].score, TEXT_VS_TEXT, backend, "text_vs_text") -def test_score_api_queries_str_documents_image_url_content(server: RemoteOpenAIServer): +def test_score_api_queries_str_documents_image_url_content( + server: tuple[RemoteOpenAIServer, str], +): + remote_server, backend = server score_response = requests.post( - server.url_for("score"), + remote_server.url_for("score"), json={ "model": MODEL_NAME, "queries": query, @@ -109,14 +181,15 @@ def test_score_api_queries_str_documents_image_url_content(server: RemoteOpenAIS assert score.data is not None assert len(score.data) == 1 assert score.usage.prompt_tokens == 98 - assert score.data[0].score == pytest.approx(TEXT_VS_IMAGE, rel=TOL) + assert_score(score.data[0].score, TEXT_VS_IMAGE, backend, "text_vs_image") def test_score_api_queries_str_documents_image_base64_content( - server: RemoteOpenAIServer, + server: tuple[RemoteOpenAIServer, str], ): + remote_server, backend = server score_response = requests.post( - server.url_for("score"), + remote_server.url_for("score"), json={ "model": MODEL_NAME, "queries": query, @@ -130,14 +203,15 @@ def test_score_api_queries_str_documents_image_base64_content( assert score.data is not None assert len(score.data) == 1 assert score.usage.prompt_tokens == 98 - assert score.data[0].score == pytest.approx(TEXT_VS_IMAGE, rel=TOL) + assert_score(score.data[0].score, TEXT_VS_IMAGE, backend, "text_vs_image_base64") def test_score_api_queries_str_documents_image_url_plus_text_content( - server: RemoteOpenAIServer, + server: tuple[RemoteOpenAIServer, str], ): + remote_server, backend = server score_response = requests.post( - server.url_for("score"), + remote_server.url_for("score"), json={ "model": MODEL_NAME, "queries": query, @@ -151,12 +225,17 @@ def test_score_api_queries_str_documents_image_url_plus_text_content( assert score.data is not None assert len(score.data) == 1 assert score.usage.prompt_tokens == 108 - assert score.data[0].score == pytest.approx(TEXT_VS_TEXT_PLUS_IMAGE, rel=TOL) + assert_score( + score.data[0].score, TEXT_VS_TEXT_PLUS_IMAGE, backend, "text_vs_text_plus_image" + ) -def test_score_api_queries_str_documents_list(server: RemoteOpenAIServer): +def test_score_api_queries_str_documents_list( + server: tuple[RemoteOpenAIServer, str], +): + remote_server, backend = server score_response = requests.post( - server.url_for("score"), + remote_server.url_for("score"), json={ "model": MODEL_NAME, "queries": query, @@ -175,15 +254,23 @@ def test_score_api_queries_str_documents_list(server: RemoteOpenAIServer): assert score.data is not None assert len(score.data) == 4 assert score.usage.prompt_tokens == 368 - assert score.data[0].score == pytest.approx(TEXT_VS_TEXT, rel=TOL) - assert score.data[1].score == pytest.approx(TEXT_VS_TEXT, rel=TOL) - assert score.data[2].score == pytest.approx(TEXT_VS_IMAGE, rel=TOL) - assert score.data[3].score == pytest.approx(TEXT_VS_TEXT_PLUS_IMAGE, rel=TOL) + assert_score(score.data[0].score, TEXT_VS_TEXT, backend, "list[0]_text_vs_text") + assert_score(score.data[1].score, TEXT_VS_TEXT, backend, "list[1]_text_vs_text") + assert_score(score.data[2].score, TEXT_VS_IMAGE, backend, "list[2]_text_vs_image") + assert_score( + score.data[3].score, + TEXT_VS_TEXT_PLUS_IMAGE, + backend, + "list[3]_text_vs_text_plus_image", + ) -def test_rerank_api_queries_str_documents_list(server: RemoteOpenAIServer): +def test_rerank_api_queries_str_documents_list( + server: tuple[RemoteOpenAIServer, str], +): + remote_server, backend = server rerank_response = requests.post( - server.url_for("rerank"), + remote_server.url_for("rerank"), json={ "model": MODEL_NAME, "query": query, @@ -204,17 +291,38 @@ def test_rerank_api_queries_str_documents_list(server: RemoteOpenAIServer): assert len(rerank.results) == 4 rerank.results.sort(key=lambda x: x.index) - assert rerank.results[0].relevance_score == pytest.approx(TEXT_VS_TEXT, rel=TOL) - assert rerank.results[1].relevance_score == pytest.approx(TEXT_VS_TEXT, rel=TOL) - assert rerank.results[2].relevance_score == pytest.approx(TEXT_VS_IMAGE, rel=TOL) - assert rerank.results[3].relevance_score == pytest.approx( - TEXT_VS_TEXT_PLUS_IMAGE, rel=TOL + assert_score( + rerank.results[0].relevance_score, + TEXT_VS_TEXT, + backend, + "rerank[0]_text_vs_text", + ) + assert_score( + rerank.results[1].relevance_score, + TEXT_VS_TEXT, + backend, + "rerank[1]_text_vs_text", + ) + assert_score( + rerank.results[2].relevance_score, + TEXT_VS_IMAGE, + backend, + "rerank[2]_text_vs_image", + ) + assert_score( + rerank.results[3].relevance_score, + TEXT_VS_TEXT_PLUS_IMAGE, + backend, + "rerank[3]_text_vs_text_plus_image", ) -def test_score_api_queries_list_documents_list(server: RemoteOpenAIServer): +def test_score_api_queries_list_documents_list( + server: tuple[RemoteOpenAIServer, str], +): + remote_server, backend = server score_response = requests.post( - server.url_for("score"), + remote_server.url_for("score"), json={ "model": MODEL_NAME, "queries": [query] * 4, @@ -233,7 +341,12 @@ def test_score_api_queries_list_documents_list(server: RemoteOpenAIServer): assert score.data is not None assert len(score.data) == 4 assert score.usage.prompt_tokens == 368 - assert score.data[0].score == pytest.approx(TEXT_VS_TEXT, rel=TOL) - assert score.data[1].score == pytest.approx(TEXT_VS_TEXT, rel=TOL) - assert score.data[2].score == pytest.approx(TEXT_VS_IMAGE, rel=TOL) - assert score.data[3].score == pytest.approx(TEXT_VS_TEXT_PLUS_IMAGE, rel=TOL) + assert_score(score.data[0].score, TEXT_VS_TEXT, backend, "paired[0]_text_vs_text") + assert_score(score.data[1].score, TEXT_VS_TEXT, backend, "paired[1]_text_vs_text") + assert_score(score.data[2].score, TEXT_VS_IMAGE, backend, "paired[2]_text_vs_image") + assert_score( + score.data[3].score, + TEXT_VS_TEXT_PLUS_IMAGE, + backend, + "paired[3]_text_vs_text_plus_image", + )