From 429d8d2669545368e6cf66f40d9cf345f14a33ba Mon Sep 17 00:00:00 2001 From: Blake Ledden Date: Tue, 24 Mar 2026 14:55:24 -0700 Subject: [PATCH 1/2] feat: add SM120 fmha_v2 kernels to AOT pip wheel builds `gen_trtllm_fmha_v2_sm120_module()` was already callable via JIT (generate_kernels.py dispatches to it at runtime), but was never registered in gen_all_modules() in aot.py. SM120/SM121 devices getting flashinfer from a pip wheel would skip the fmha_v2 SM120 kernels entirely during the AOT build step, falling back to slower paths or missing support. Add it to the `has_sm120 or has_sm121` section alongside the other SM120 modules (fused MOE, GEMM, FP4 quantization). Contributed by Second Nature Computing (https://joinsecondnature.com) Co-Authored-By: Claude Opus 4.6 --- flashinfer/aot.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/flashinfer/aot.py b/flashinfer/aot.py index d2b23b7726..3d854efe94 100644 --- a/flashinfer/aot.py +++ b/flashinfer/aot.py @@ -44,6 +44,7 @@ gen_single_decode_module, gen_single_prefill_module, gen_trtllm_gen_fmha_module, + gen_trtllm_fmha_v2_sm120_module, ) from .jit.cascade import gen_cascade_module from .jit.cpp_ext import get_cuda_version @@ -529,6 +530,7 @@ def gen_all_modules( jit_specs.append(gen_gemm_sm120_module()) jit_specs.append(gen_gemm_sm120_module_cutlass_fp4()) jit_specs.append(gen_gemm_sm120_module_cutlass_mxfp8()) + jit_specs.append(gen_trtllm_fmha_v2_sm120_module()) if has_sm120f: jit_specs.append(gen_fp4_quantization_sm120f_module()) From 0285211b824621aa1f940a2908b81d09dd433fe9 Mon Sep 17 00:00:00 2001 From: Blake Ledden Date: Tue, 24 Mar 2026 15:05:48 -0700 Subject: [PATCH 2/2] nit: update SM120 comment to include attention kernels Co-Authored-By: Claude Opus 4.6 --- flashinfer/aot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flashinfer/aot.py b/flashinfer/aot.py index 3d854efe94..7afc4e54bb 100644 --- a/flashinfer/aot.py +++ b/flashinfer/aot.py @@ -523,7 +523,7 @@ def gen_all_modules( if has_sm121: jit_specs.append(gen_fp4_quantization_sm121_module()) if has_sm120 or has_sm121: - # SM120 and SM121 share the same CUTLASS kernels for fused MOE and GEMM. + # SM120 and SM121 share the same kernels for fused MOE, GEMM, and attention. # The SM120 module generators use supported_major_versions=[12] which # compiles for all SM12x targets. jit_specs.append(gen_cutlass_fused_moe_sm120_module())