From d44b0acc6d0be7e00395562bb3d62d55c12e71d4 Mon Sep 17 00:00:00 2001 From: vllmellm Date: Mon, 5 Jan 2026 08:48:16 +0000 Subject: [PATCH 1/2] add ROCm attention metadata backend in eagle Signed-off-by: vllmellm --- vllm/v1/spec_decode/eagle.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 66697132b365..64c0d2d2b031 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -168,7 +168,13 @@ def __init__( # Determine allowed attention backends once during initialization. self.allowed_attn_types: tuple | None = None if current_platform.is_rocm(): - rocm_types = [TritonAttentionMetadata, FlashAttentionMetadata] + from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata + + rocm_types = [ + TritonAttentionMetadata, + FlashAttentionMetadata, + RocmAttentionMetadata, + ] # ROCM_AITER_FA is an optional backend if find_spec( AttentionBackendEnum.ROCM_AITER_FA.get_path(include_classname=False) From 8b36f2ea7c6dea1af047e4abfe0d1b93bbbdd828 Mon Sep 17 00:00:00 2001 From: vllmellm Date: Mon, 5 Jan 2026 09:43:00 +0000 Subject: [PATCH 2/2] remove unused metadata for rocm platform Signed-off-by: vllmellm --- vllm/v1/spec_decode/eagle.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 64c0d2d2b031..7f64fd4baf3a 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -27,7 +27,6 @@ from vllm.platforms import current_platform from vllm.triton_utils import triton from vllm.utils.platform_utils import is_pin_memory_available -from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata from vllm.v1.attention.backends.tree_attn import ( TreeAttentionMetadata, TreeAttentionMetadataBuilder, @@ -172,7 +171,6 @@ def __init__( rocm_types = [ TritonAttentionMetadata, - FlashAttentionMetadata, RocmAttentionMetadata, ] # ROCM_AITER_FA is an optional backend