From 1cec9c414fde28afdc9f99ae165e4706ef4ab04f Mon Sep 17 00:00:00 2001
From: Andreas Karatzas <akaratza@amd.com>
Date: Thu, 19 Mar 2026 19:06:37 -0500
Subject: [PATCH 1/3] [ROCm][CI] Guard CudaPlatform/RocmPlatform imports to fix
 test collection on cross-platform builds

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
---
 .../attention/test_attention_selector.py      | 32 +++++++++++++++++--
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py
index 347205755c68..20865a8c5568 100644
--- a/tests/kernels/attention/test_attention_selector.py
+++ b/tests/kernels/attention/test_attention_selector.py
@@ -14,8 +14,19 @@
 )
 from vllm.platforms import current_platform
 from vllm.platforms.cpu import CpuPlatform
-from vllm.platforms.cuda import CudaPlatform
-from vllm.platforms.rocm import RocmPlatform
+
+# CudaPlatform and RocmPlatform import their respective compiled C extensions
+# at module level, raising ModuleNotFoundError on incompatible builds.
+try:
+    from vllm.platforms.cuda import CudaPlatform
+except (ImportError, ModuleNotFoundError):
+    CudaPlatform = None
+
+try:
+    from vllm.platforms.rocm import RocmPlatform
+except (ImportError, ModuleNotFoundError):
+    RocmPlatform = None
+
 from vllm.v1.attention.backends.registry import AttentionBackendEnum
 from vllm.v1.attention.selector import _cached_get_attn_backend, get_attn_backend
 
@@ -101,6 +112,8 @@ def test_backend_selection(
             assert backend.get_name() == "CPU_ATTN"
 
         elif device == "hip":
+            if RocmPlatform is None:
+                pytest.skip("RocmPlatform not available")
             with patch("vllm.platforms.current_platform", RocmPlatform()):
                 if use_mla:
                     # ROCm MLA backend logic:
@@ -126,6 +139,8 @@ def test_backend_selection(
                     assert backend.get_name() == expected
 
         elif device == "cuda":
+            if CudaPlatform is None:
+                pytest.skip("CudaPlatform not available")
             with patch("vllm.platforms.current_platform", CudaPlatform()):
                 capability = torch.cuda.get_device_capability()
                 if use_mla:
@@ -214,7 +229,7 @@ def test_backend_selection(
                     assert backend.get_name() == expected
 
 
-@pytest.mark.parametrize("device", ["cpu", "cuda"])
+@pytest.mark.parametrize("device", ["cpu", "cuda", "hip"])
 def test_fp32_fallback(device: str):
     """Test attention backend selection with fp32."""
     # Use default config (no backend specified)
@@ -227,10 +242,19 @@ def test_fp32_fallback(device: str):
             assert backend.get_name() == "CPU_ATTN"
 
         elif device == "cuda":
+            if CudaPlatform is None:
+                pytest.skip("CudaPlatform not available")
             with patch("vllm.platforms.current_platform", CudaPlatform()):
                 backend = get_attn_backend(16, torch.float32, None)
             assert backend.get_name() == "FLEX_ATTENTION"
 
+        elif device == "hip":
+            if RocmPlatform is None:
+                pytest.skip("RocmPlatform not available")
+            with patch("vllm.platforms.current_platform", RocmPlatform()):
+                backend = get_attn_backend(16, torch.float32, None)
+            assert backend.get_name() == "ROCM_ATTN"
+
 
 def test_flash_attn(monkeypatch: pytest.MonkeyPatch):
     """Test FlashAttn validation."""
@@ -367,6 +391,8 @@ def test_per_head_quant_scales_backend_selection(
         attention_config=attention_config, cache_config=cache_config
     )
 
+    if CudaPlatform is None:
+        pytest.skip("CudaPlatform not available")
     with (
         set_current_vllm_config(vllm_config),
         patch("vllm.platforms.current_platform", CudaPlatform()),

From 6d75e07c76695b5193a81ede87caa3e55600c075 Mon Sep 17 00:00:00 2001
From: Andreas Karatzas <akaratza@amd.com>
Date: Fri, 20 Mar 2026 12:32:25 -0500
Subject: [PATCH 2/3] [ROCm][CI] Fix test_fp32_fallback[hip] by enabling
 use_prefill_decode_attention for ROCM_ATTN

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
---
 tests/kernels/attention/test_attention_selector.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py
index 20865a8c5568..c926e61d7836 100644
--- a/tests/kernels/attention/test_attention_selector.py
+++ b/tests/kernels/attention/test_attention_selector.py
@@ -251,7 +251,12 @@ def test_fp32_fallback(device: str):
         elif device == "hip":
             if RocmPlatform is None:
                 pytest.skip("RocmPlatform not available")
-            with patch("vllm.platforms.current_platform", RocmPlatform()):
+            attention_config = AttentionConfig(use_prefill_decode_attention=True)
+            hip_vllm_config = VllmConfig(attention_config=attention_config)
+            with (
+                set_current_vllm_config(hip_vllm_config),
+                patch("vllm.platforms.current_platform", RocmPlatform()),
+            ):
                 backend = get_attn_backend(16, torch.float32, None)
             assert backend.get_name() == "ROCM_ATTN"
 

From 95399f1030ea9689aebaa4decce2cdfcc8c92028 Mon Sep 17 00:00:00 2001
From: Andreas Karatzas <akaratza@amd.com>
Date: Fri, 20 Mar 2026 16:35:13 -0500
Subject: [PATCH 3/3] [ROCm][CI] Fix test_fp32_fallback[hip] to expect failure
 for unsupported head size

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
---
 tests/kernels/attention/test_attention_selector.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py
index c926e61d7836..3ebf9cc3713a 100644
--- a/tests/kernels/attention/test_attention_selector.py
+++ b/tests/kernels/attention/test_attention_selector.py
@@ -251,14 +251,15 @@ def test_fp32_fallback(device: str):
         elif device == "hip":
             if RocmPlatform is None:
                 pytest.skip("RocmPlatform not available")
-            attention_config = AttentionConfig(use_prefill_decode_attention=True)
-            hip_vllm_config = VllmConfig(attention_config=attention_config)
+            # ROCm backends do not support head_size=16 (minimum is 32).
+            # No known HuggingFace transformer model uses head_size=16.
+            # Revisit if a real model with this head size is identified
+            # and accuracy-tested.
             with (
-                set_current_vllm_config(hip_vllm_config),
                 patch("vllm.platforms.current_platform", RocmPlatform()),
+                pytest.raises(ValueError, match="No valid attention backend"),
             ):
-                backend = get_attn_backend(16, torch.float32, None)
-            assert backend.get_name() == "ROCM_ATTN"
+                get_attn_backend(16, torch.float32, None)
 
 
 def test_flash_attn(monkeypatch: pytest.MonkeyPatch):