From 0dd839418bda2ca997e95d88a2cdf819aeaf1ada Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg Date: Fri, 20 Feb 2026 22:58:48 +0000 Subject: [PATCH 1/2] Check for sinks requirement for AITER MHA selector Signed-off-by: Gregory Shtrasberg --- vllm/platforms/rocm.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 2fedd7c6791f..811b5a1dadd1 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -345,7 +345,12 @@ def get_attn_backend_cls( # Priority 2: Check for AITER MHA (Flash Attention) # Only use if explicitly enabled (not just VLLM_ROCM_USE_AITER=1) - if envs.VLLM_ROCM_USE_AITER and envs.VLLM_ROCM_USE_AITER_MHA and on_gfx9(): + if ( + envs.VLLM_ROCM_USE_AITER + and envs.VLLM_ROCM_USE_AITER_MHA + and on_gfx9() + and not attn_selector_config.has_sink + ): logger.info("Using Aiter Flash Attention backend.") return AttentionBackendEnum.ROCM_AITER_FA.get_path() @@ -365,7 +370,8 @@ def get_attn_backend_cls( if ( envs.VLLM_ROCM_USE_AITER and on_gfx9() - and envs.VLLM_ROCM_USE_AITER_MHA is not False + and envs.VLLM_ROCM_USE_AITER_MHA + and not attn_selector_config.has_sink ): logger.info("Using Aiter Flash Attention backend.") return AttentionBackendEnum.ROCM_AITER_FA.get_path() From 1066c4fa1b60ffa919c903ce21972d91cad6c468 Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg Date: Fri, 20 Feb 2026 23:01:29 +0000 Subject: [PATCH 2/2] Add check for sinks when explicitly selected Signed-off-by: Gregory Shtrasberg --- vllm/platforms/rocm.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 811b5a1dadd1..25b3c3b0ec07 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -323,6 +323,11 @@ def get_attn_backend_cls( return AttentionBackendEnum.ROCM_ATTN.get_path() if selected_backend == AttentionBackendEnum.ROCM_AITER_FA: + if attn_selector_config.has_sink: + raise ValueError( + f"The selected backend, {selected_backend.name}, " + "does not support sinks." + ) if on_gfx9(): logger.info("Using Aiter Flash Attention backend.") return AttentionBackendEnum.ROCM_AITER_FA.get_path()