diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 43daf5e75b66..22c6dde754d0 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -55,15 +55,15 @@ def _get_backend_priorities( return [ AttentionBackendEnum.CUTLASS_MLA, AttentionBackendEnum.FLASHINFER_MLA, - AttentionBackendEnum.FLASHMLA, AttentionBackendEnum.FLASH_ATTN_MLA, + AttentionBackendEnum.FLASHMLA, AttentionBackendEnum.TRITON_MLA, AttentionBackendEnum.FLASHMLA_SPARSE, ] else: return [ - AttentionBackendEnum.FLASHMLA, AttentionBackendEnum.FLASH_ATTN_MLA, + AttentionBackendEnum.FLASHMLA, AttentionBackendEnum.FLASHINFER_MLA, AttentionBackendEnum.TRITON_MLA, AttentionBackendEnum.FLASHMLA_SPARSE,