From 1cec9c414fde28afdc9f99ae165e4706ef4ab04f Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Thu, 19 Mar 2026 19:06:37 -0500 Subject: [PATCH 1/3] [ROCm][CI] Guard CudaPlatform/RocmPlatform imports to fix test collection on cross-platform builds Signed-off-by: Andreas Karatzas --- .../attention/test_attention_selector.py | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py index 347205755c68..20865a8c5568 100644 --- a/tests/kernels/attention/test_attention_selector.py +++ b/tests/kernels/attention/test_attention_selector.py @@ -14,8 +14,19 @@ ) from vllm.platforms import current_platform from vllm.platforms.cpu import CpuPlatform -from vllm.platforms.cuda import CudaPlatform -from vllm.platforms.rocm import RocmPlatform + +# CudaPlatform and RocmPlatform import their respective compiled C extensions +# at module level, raising ModuleNotFoundError on incompatible builds. +try: + from vllm.platforms.cuda import CudaPlatform +except (ImportError, ModuleNotFoundError): + CudaPlatform = None + +try: + from vllm.platforms.rocm import RocmPlatform +except (ImportError, ModuleNotFoundError): + RocmPlatform = None + from vllm.v1.attention.backends.registry import AttentionBackendEnum from vllm.v1.attention.selector import _cached_get_attn_backend, get_attn_backend @@ -101,6 +112,8 @@ def test_backend_selection( assert backend.get_name() == "CPU_ATTN" elif device == "hip": + if RocmPlatform is None: + pytest.skip("RocmPlatform not available") with patch("vllm.platforms.current_platform", RocmPlatform()): if use_mla: # ROCm MLA backend logic: @@ -126,6 +139,8 @@ def test_backend_selection( assert backend.get_name() == expected elif device == "cuda": + if CudaPlatform is None: + pytest.skip("CudaPlatform not available") with patch("vllm.platforms.current_platform", CudaPlatform()): capability = torch.cuda.get_device_capability() if use_mla: @@ -214,7 +229,7 @@ def test_backend_selection( assert backend.get_name() == expected -@pytest.mark.parametrize("device", ["cpu", "cuda"]) +@pytest.mark.parametrize("device", ["cpu", "cuda", "hip"]) def test_fp32_fallback(device: str): """Test attention backend selection with fp32.""" # Use default config (no backend specified) @@ -227,10 +242,19 @@ def test_fp32_fallback(device: str): assert backend.get_name() == "CPU_ATTN" elif device == "cuda": + if CudaPlatform is None: + pytest.skip("CudaPlatform not available") with patch("vllm.platforms.current_platform", CudaPlatform()): backend = get_attn_backend(16, torch.float32, None) assert backend.get_name() == "FLEX_ATTENTION" + elif device == "hip": + if RocmPlatform is None: + pytest.skip("RocmPlatform not available") + with patch("vllm.platforms.current_platform", RocmPlatform()): + backend = get_attn_backend(16, torch.float32, None) + assert backend.get_name() == "ROCM_ATTN" + def test_flash_attn(monkeypatch: pytest.MonkeyPatch): """Test FlashAttn validation.""" @@ -367,6 +391,8 @@ def test_per_head_quant_scales_backend_selection( attention_config=attention_config, cache_config=cache_config ) + if CudaPlatform is None: + pytest.skip("CudaPlatform not available") with ( set_current_vllm_config(vllm_config), patch("vllm.platforms.current_platform", CudaPlatform()), From 6d75e07c76695b5193a81ede87caa3e55600c075 Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Fri, 20 Mar 2026 12:32:25 -0500 Subject: [PATCH 2/3] [ROCm][CI] Fix test_fp32_fallback[hip] by enabling use_prefill_decode_attention for ROCM_ATTN Signed-off-by: Andreas Karatzas --- tests/kernels/attention/test_attention_selector.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py index 20865a8c5568..c926e61d7836 100644 --- a/tests/kernels/attention/test_attention_selector.py +++ b/tests/kernels/attention/test_attention_selector.py @@ -251,7 +251,12 @@ def test_fp32_fallback(device: str): elif device == "hip": if RocmPlatform is None: pytest.skip("RocmPlatform not available") - with patch("vllm.platforms.current_platform", RocmPlatform()): + attention_config = AttentionConfig(use_prefill_decode_attention=True) + hip_vllm_config = VllmConfig(attention_config=attention_config) + with ( + set_current_vllm_config(hip_vllm_config), + patch("vllm.platforms.current_platform", RocmPlatform()), + ): backend = get_attn_backend(16, torch.float32, None) assert backend.get_name() == "ROCM_ATTN" From 95399f1030ea9689aebaa4decce2cdfcc8c92028 Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Fri, 20 Mar 2026 16:35:13 -0500 Subject: [PATCH 3/3] [ROCm][CI] Fix test_fp32_fallback[hip] to expect failure for unsupported head size Signed-off-by: Andreas Karatzas --- tests/kernels/attention/test_attention_selector.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py index c926e61d7836..3ebf9cc3713a 100644 --- a/tests/kernels/attention/test_attention_selector.py +++ b/tests/kernels/attention/test_attention_selector.py @@ -251,14 +251,15 @@ def test_fp32_fallback(device: str): elif device == "hip": if RocmPlatform is None: pytest.skip("RocmPlatform not available") - attention_config = AttentionConfig(use_prefill_decode_attention=True) - hip_vllm_config = VllmConfig(attention_config=attention_config) + # ROCm backends do not support head_size=16 (minimum is 32). + # No known HuggingFace transformer model uses head_size=16. + # Revisit if a real model with this head size is identified + # and accuracy-tested. with ( - set_current_vllm_config(hip_vllm_config), patch("vllm.platforms.current_platform", RocmPlatform()), + pytest.raises(ValueError, match="No valid attention backend"), ): - backend = get_attn_backend(16, torch.float32, None) - assert backend.get_name() == "ROCM_ATTN" + get_attn_backend(16, torch.float32, None) def test_flash_attn(monkeypatch: pytest.MonkeyPatch):