Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions cmake/external_projects/flashmla.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ else()
FetchContent_Declare(
flashmla
GIT_REPOSITORY https://github.com/vllm-project/FlashMLA.git
GIT_TAG 575f7724b9762f265bbee5889df9c7d630801845
GIT_TAG 24c6cab4b177294e6fe60c0cc8f6b0e92b7f0a31
GIT_PROGRESS TRUE
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
Expand All @@ -37,9 +37,9 @@ cuda_archs_loose_intersection(FLASH_MLA_ARCHS "9.0a" "${CUDA_ARCHS}")
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.3 AND FLASH_MLA_ARCHS)
set(FlashMLA_SOURCES
${flashmla_SOURCE_DIR}/csrc/flash_api.cpp
${flashmla_SOURCE_DIR}/csrc/flash_fwd_mla_bf16_sm90.cu
${flashmla_SOURCE_DIR}/csrc/flash_fwd_mla_fp16_sm90.cu
${flashmla_SOURCE_DIR}/csrc/flash_fwd_mla_metadata.cu)
${flashmla_SOURCE_DIR}/csrc/kernels/splitkv_mla.cu
${flashmla_SOURCE_DIR}/csrc/kernels/mla_combine.cu
${flashmla_SOURCE_DIR}/csrc/kernels/get_mla_metadata.cu)

set(FlashMLA_INCLUDES
${flashmla_SOURCE_DIR}/csrc/cutlass/include
Expand Down
1 change: 0 additions & 1 deletion vllm/attention/ops/flashmla.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ def flash_mla_with_kvcache(
out, softmax_lse = torch.ops._flashmla_C.fwd_kvcache_mla(
q,
k_cache,
None,
head_dim_v,
cache_seqlens,
block_table,
Expand Down