diff --git a/sgl-kernel/CMakeLists.txt b/sgl-kernel/CMakeLists.txt index 05b9a056c40e..f5ad30872c76 100644 --- a/sgl-kernel/CMakeLists.txt +++ b/sgl-kernel/CMakeLists.txt @@ -92,7 +92,7 @@ FetchContent_Populate(repo-flashinfer) FetchContent_Declare( repo-flash-attention GIT_REPOSITORY https://github.com/sgl-project/sgl-attn - GIT_TAG f866ec34002250e74c8bbcbcffa0e1ae71300b2d + GIT_TAG cc75c5c5979a607ad20a6828635646f9841acf01 GIT_SHALLOW OFF ) FetchContent_Populate(repo-flash-attention) @@ -464,21 +464,21 @@ if (SGL_KERNEL_ENABLE_FA3) endif() file(GLOB FA3_BF16_GEN_SRCS - "${repo-flash-attention_SOURCE_DIR}/hopper/instantiations/flash_fwd_hdimall_bf16*_sm90.cu") + "${repo-flash-attention_SOURCE_DIR}/hopper/instantiations/flash_fwd_hdim[0-9]*_bf16*_sm90.cu") file(GLOB FA3_BF16_GEN_SRCS_ "${repo-flash-attention_SOURCE_DIR}/hopper/instantiations/flash_fwd_hdimdiff_bf16*_sm90.cu") list(APPEND FA3_BF16_GEN_SRCS ${FA3_BF16_GEN_SRCS_}) - # FP16 source files + # FP16 source files - use individual hdim files instead of hdimall to avoid ptxas crash file(GLOB FA3_FP16_GEN_SRCS - "${repo-flash-attention_SOURCE_DIR}/hopper/instantiations/flash_fwd_hdimall_fp16*_sm90.cu") + "${repo-flash-attention_SOURCE_DIR}/hopper/instantiations/flash_fwd_hdim[0-9]*_fp16*_sm90.cu") file(GLOB FA3_FP16_GEN_SRCS_ "${repo-flash-attention_SOURCE_DIR}/hopper/instantiations/flash_fwd_hdimdiff_fp16*_sm90.cu") list(APPEND FA3_FP16_GEN_SRCS ${FA3_FP16_GEN_SRCS_}) # FP8 source files file(GLOB FA3_FP8_GEN_SRCS - "${repo-flash-attention_SOURCE_DIR}/hopper/instantiations/flash_fwd_hdimall_e4m3*_sm90.cu") + "${repo-flash-attention_SOURCE_DIR}/hopper/instantiations/flash_fwd_hdim[0-9]*_e4m3*_sm90.cu") file(GLOB FA3_FP8_GEN_SRCS_ "${repo-flash-attention_SOURCE_DIR}/hopper/instantiations/flash_fwd_hdimdiff_e4m3*_sm90.cu") list(APPEND FA3_FP8_GEN_SRCS ${FA3_FP8_GEN_SRCS_})