From bedd022fac29a9a3e12c40cacb1ba97b30a97f99 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Wed, 23 Apr 2025 05:03:30 +0000 Subject: [PATCH 1/2] update FlashMLA Signed-off-by: Lucas Wilkinson --- cmake/external_projects/flashmla.cmake | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/external_projects/flashmla.cmake b/cmake/external_projects/flashmla.cmake index 6291475164ba..089a17d6c006 100644 --- a/cmake/external_projects/flashmla.cmake +++ b/cmake/external_projects/flashmla.cmake @@ -19,7 +19,7 @@ else() FetchContent_Declare( flashmla GIT_REPOSITORY https://github.com/vllm-project/FlashMLA.git - GIT_TAG 575f7724b9762f265bbee5889df9c7d630801845 + GIT_TAG 24c6cab4b177294e6fe60c0cc8f6b0e92b7f0a31 GIT_PROGRESS TRUE CONFIGURE_COMMAND "" BUILD_COMMAND "" @@ -37,9 +37,9 @@ cuda_archs_loose_intersection(FLASH_MLA_ARCHS "9.0a" "${CUDA_ARCHS}") if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.3 AND FLASH_MLA_ARCHS) set(FlashMLA_SOURCES ${flashmla_SOURCE_DIR}/csrc/flash_api.cpp - ${flashmla_SOURCE_DIR}/csrc/flash_fwd_mla_bf16_sm90.cu - ${flashmla_SOURCE_DIR}/csrc/flash_fwd_mla_fp16_sm90.cu - ${flashmla_SOURCE_DIR}/csrc/flash_fwd_mla_metadata.cu) + ${flashmla_SOURCE_DIR}/csrc/kernels/splitkv_mla.cu + ${flashmla_SOURCE_DIR}/csrc/kernels/mla_combine.cu + ${flashmla_SOURCE_DIR}/csrc/kernels/get_mla_metadata.cu) set(FlashMLA_INCLUDES ${flashmla_SOURCE_DIR}/csrc/cutlass/include From 626daecd76dfda078a4900ebdaacbd0eda019b7c Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Wed, 23 Apr 2025 05:19:16 +0000 Subject: [PATCH 2/2] remove no-longer used param Signed-off-by: Lucas Wilkinson --- vllm/attention/ops/flashmla.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/attention/ops/flashmla.py b/vllm/attention/ops/flashmla.py index 18b69a6b3ddf..ccbfca340cae 100644 --- a/vllm/attention/ops/flashmla.py +++ b/vllm/attention/ops/flashmla.py @@ -90,7 +90,6 @@ def flash_mla_with_kvcache( out, softmax_lse = torch.ops._flashmla_C.fwd_kvcache_mla( q, k_cache, - None, head_dim_v, cache_seqlens, block_table,