From 429d8d2669545368e6cf66f40d9cf345f14a33ba Mon Sep 17 00:00:00 2001
From: Blake Ledden <blake@secondnaturecomputing.com>
Date: Tue, 24 Mar 2026 14:55:24 -0700
Subject: [PATCH 1/2] feat: add SM120 fmha_v2 kernels to AOT pip wheel builds

`gen_trtllm_fmha_v2_sm120_module()` was already callable via JIT
(generate_kernels.py dispatches to it at runtime), but was never
registered in gen_all_modules() in aot.py. SM120/SM121 devices
getting flashinfer from a pip wheel would skip the fmha_v2 SM120
kernels entirely during the AOT build step, falling back to slower
paths or missing support.

Add it to the `has_sm120 or has_sm121` section alongside the other
SM120 modules (fused MOE, GEMM, FP4 quantization).

Contributed by Second Nature Computing (https://joinsecondnature.com)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 flashinfer/aot.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/flashinfer/aot.py b/flashinfer/aot.py
index d2b23b7726..3d854efe94 100644
--- a/flashinfer/aot.py
+++ b/flashinfer/aot.py
@@ -44,6 +44,7 @@
     gen_single_decode_module,
     gen_single_prefill_module,
     gen_trtllm_gen_fmha_module,
+    gen_trtllm_fmha_v2_sm120_module,
 )
 from .jit.cascade import gen_cascade_module
 from .jit.cpp_ext import get_cuda_version
@@ -529,6 +530,7 @@ def gen_all_modules(
             jit_specs.append(gen_gemm_sm120_module())
             jit_specs.append(gen_gemm_sm120_module_cutlass_fp4())
             jit_specs.append(gen_gemm_sm120_module_cutlass_mxfp8())
+            jit_specs.append(gen_trtllm_fmha_v2_sm120_module())
         if has_sm120f:
             jit_specs.append(gen_fp4_quantization_sm120f_module())
 

From 0285211b824621aa1f940a2908b81d09dd433fe9 Mon Sep 17 00:00:00 2001
From: Blake Ledden <blake@secondnaturecomputing.com>
Date: Tue, 24 Mar 2026 15:05:48 -0700
Subject: [PATCH 2/2] nit: update SM120 comment to include attention kernels

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 flashinfer/aot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flashinfer/aot.py b/flashinfer/aot.py
index 3d854efe94..7afc4e54bb 100644
--- a/flashinfer/aot.py
+++ b/flashinfer/aot.py
@@ -523,7 +523,7 @@ def gen_all_modules(
         if has_sm121:
             jit_specs.append(gen_fp4_quantization_sm121_module())
         if has_sm120 or has_sm121:
-            # SM120 and SM121 share the same CUTLASS kernels for fused MOE and GEMM.
+            # SM120 and SM121 share the same kernels for fused MOE, GEMM, and attention.
             # The SM120 module generators use supported_major_versions=[12] which
             # compiles for all SM12x targets.
             jit_specs.append(gen_cutlass_fused_moe_sm120_module())