diff --git a/vllm/v1/attention/backends/rocm_attn.py b/vllm/v1/attention/backends/rocm_attn.py index 0b9889c136b4..5a70dc2bae93 100644 --- a/vllm/v1/attention/backends/rocm_attn.py +++ b/vllm/v1/attention/backends/rocm_attn.py @@ -121,6 +121,7 @@ def build( slot_mapping = common_attn_metadata.slot_mapping use_cascade = common_prefix_len > 0 + prefix_scheduler_metadata = None if use_cascade: cu_prefix_query_lens = torch.tensor( @@ -135,7 +136,6 @@ def build( cu_prefix_query_lens = None prefix_kv_lens = None suffix_kv_lens = None - prefix_scheduler_metadata = None attn_metadata = RocmAttentionMetadata( num_actual_tokens=num_actual_tokens,