diff --git a/cpp/tensorrt_llm/batch_manager/kvCacheTransferManager.cpp b/cpp/tensorrt_llm/batch_manager/kvCacheTransferManager.cpp index fd5758a8368..dd6eb38b4e0 100644 --- a/cpp/tensorrt_llm/batch_manager/kvCacheTransferManager.cpp +++ b/cpp/tensorrt_llm/batch_manager/kvCacheTransferManager.cpp @@ -114,9 +114,13 @@ void KVCacheTransferManager::copyBlock(BlockPtr const& src, BlockPtr const& dst, auto srcPtr = computeBlockPointer(src, pools, poolIdx); auto dstPtr = computeBlockPointer(dst, pools, poolIdx); + // Does it contain block scales? + auto containsBlockScales = pools[poolIdx].containsBlockScales; + // If no partial tokens or if the dataType is not supported for partial copy, copy entire block. + // Note that nvfp4 kv cache SFs use an interleaved layout, so we need to copy the entire block. if (numTokensToCopy <= 0 || srcPtr->getDataType() == nvinfer1::DataType::kINT4 - || srcPtr->getDataType() == nvinfer1::DataType::kFP4) + || srcPtr->getDataType() == nvinfer1::DataType::kFP4 || containsBlockScales) { // For partial copy not implemented with these data types, // just do a full copy. diff --git a/cpp/tensorrt_llm/kernels/fmhaDispatcher.cpp b/cpp/tensorrt_llm/kernels/fmhaDispatcher.cpp index 11b3e1b0ffd..b46564d49a6 100644 --- a/cpp/tensorrt_llm/kernels/fmhaDispatcher.cpp +++ b/cpp/tensorrt_llm/kernels/fmhaDispatcher.cpp @@ -189,10 +189,11 @@ void FmhaDispatcher::run(MHARunnerParams runnerParams) tllmRunnerParams.attentionSinksPtr = runnerParams.attentionSinksPtr; tllmRunnerParams.cumSeqLensQPtr = reinterpret_cast(runnerParams.cuQSeqLenPtr); tllmRunnerParams.cumSeqLensKvPtr = reinterpret_cast(runnerParams.cuKvSeqLenPtr); + // Attention scales device pointers (only fp8 kernels need to load scales from the device memory). tllmRunnerParams.outputScalePtr = reinterpret_cast(runnerParams.scaleBmm2Ptr); - // TRTLLM-GEN kernels always use the Log2 scale - tllmRunnerParams.scaleSoftmaxLog2Ptr - = reinterpret_cast(runnerParams.scaleBmm1Ptr + kIdxScaleSoftmaxLog2Ptr); + tllmRunnerParams.scaleSoftmaxLog2Ptr = runnerParams.scaleBmm1Ptr + ? reinterpret_cast(runnerParams.scaleBmm1Ptr + kIdxScaleSoftmaxLog2Ptr) + : nullptr; tllmRunnerParams.kvPageIdxPtr = reinterpret_cast(kvPageIdxPtr); tllmRunnerParams.oSfScalePtr = runnerParams.oSfScalePtr; tllmRunnerParams.oPtr = runnerParams.outputPtr; diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 066477f2bdc..05023fc7409 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a74c90bed8cdfc61d4d30985f0a037b948a845387af20641313e17b1892c830b -size 612398 +oid sha256:331aaf5e84f39f9ce4940fce18d646701f80caf6681d8ba1244934171baf9d03 +size 616196 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index dd8d82a3f9a..c180284f18d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47b29936b5167f32d44959bdbbdb8943a3b4152c0f42860fbcafbd27fbe930d4 -size 547072 +oid sha256:e93cb23f1ee61233c61091dc880258c59fa006abb5950cc6c8e1a99da2537845 +size 551858 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index b9e8644acba..b7e30e8d602 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:64a5d40ff29adb68f36625bd0f0fba00b347fc0e20c7acf901eea8b97919a1bf -size 601346 +oid sha256:748f8edf49b35d4c0502d3a292f11a53673d224539f8a94e2f9724bf17b8502b +size 605146 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 5af9e3c9d6e..04281100558 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fab341ce70c852b81be9d942b033a133cd803f2f095f3fad59dfb65e03292929 -size 536022 +oid sha256:e4ba7f26e6cb3e11b76321d6539bf2e3d194908058ae296a5b4c3ecb36fdfdf3 +size 540806 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0a6fc104282..ce0b43ff0a5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9a96b284f3fd4745012dffb728a2659b4de6e79d8c5c47efa375d7b3859d00bc -size 594912 +oid sha256:799f27d110ed5c5b76d30c39a945d1bf8a28078be2a9dbf35db18a27a8f608dd +size 466054 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5934f6157f2..c918735f2ea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0cdbd895f8ce46c6432bf8eb0d6682c247ba3a79ac84f5d3f37bc00ae626ca74 -size 554678 +oid sha256:a2e8a5d62a02d3ba248b1a19bfbf9d0cd11674705283022033f5243b98ee72cd +size 432382 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 647349a5b64..049e4c8f46f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7759c5d27b868e46921e41870c6ea9cd2f2bb96b46f0cc5308c6890689296dcd -size 584176 +oid sha256:a21b45df44b576b61dff0cd0e89caeb324971601c3b458cfc72f32640a76eee4 +size 456106 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2cab97c1286..a82eacf083e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cde6a4e81280191ad7ccbcaeb0ffcc3c0aab9e82061b6c5aa71c95e9cce70a27 -size 550258 +oid sha256:281d59d446597519bb0fdd6fd5b46cc9a69fe5ede08fb6b46426cef8bf5b1327 +size 427984 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index e345c954d97..902283593ba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d47e4f694dc6ae10a63a00764a5aade6395bcf539d951621e28fb110530dc408 -size 609238 +oid sha256:6fa67e1282156d5bce4db0dcd6a488b0e68abd2281d6351e71ed2d9cc442105f +size 611458 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 5294e9aa99e..0cc98a593fc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf79a77fe7e0600b403aa282885bf5a905d779783c5c7db4ffa1248219fe3814 -size 543518 +oid sha256:3f322ae84000c0990a01b0eaab20450cbddf8240fed8d69c77396561687559ba +size 550350 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 648e87e04d3..433e374a8e8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4929315026345f84ad9f9c775af67bcda4f985fee6576f4ce00179a65d570361 -size 558558 +oid sha256:8591279735daf4e616f620a0e4168b248d2f6505c1e69269055086f20826713c +size 435916 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8824aac2d7b..286d8bffef4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3281e908cabc1922569c8cf1969b16d7465fb67774b9da19a177282d3b1b1e9 -size 504382 +oid sha256:0fedf3acc531073c068c75a502b0fcfe0ab72a4c8a74738c1359a40b306c7d71 +size 377152 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 99c3ddedde1..76f308000cc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4f3d0c8776d8b2b6c40a46d516b9a91e1935d4abd9c6ad0c7152c48fd6665f0 -size 530214 +oid sha256:100eb03cb08b71dc45f37bab8d009daa24aedc37f33a6279e47568dc9f878195 +size 415686 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index acaad919e75..14697fa1e55 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b5d8adb1bc095537205c3ca57e1b6c696793167a1d0ab4e1a969e7447fb0668d -size 476062 +oid sha256:65e4966f977dba8575f0cb5f023b77878cafcfb1294fa35a481759f7285b85de +size 356922 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ba92f1116ee..053f09e6232 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a77b0dcc2ce7bad1f8a5b28082d398a1af7fef4e5ff0dda56f5274700d2ee753 -size 583862 +oid sha256:d459c6fd38a1dbd1f0487979a9a200943b194b8d7be5d97b3946d7143f977950 +size 455792 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 93dd56f2fcd..5d9194c1f66 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:08065a7499eed28bd6a6677d89054cef67331b00078dc331294a197d4deacbc5 -size 543628 +oid sha256:9a2c8d5224615543b71f1d21b787b308a7628f83765ea6dca8785948c3051433 +size 421330 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 720a7c84f59..3ac02ab2711 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e203cec5159580e8db31ea630e13f11f9fcfde51a11ea924d4ba0f46089f5c52 -size 572336 +oid sha256:cf234f00c09a023d6c41428a610989d2f94a87633a0c85adf5b3594dcb89354f +size 445056 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 21fc670eaab..8bdaa59cefd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43533e5e38e805fab229356c4756a32ba305565b1af063e0d9b991a5e6e4f589 -size 538418 +oid sha256:3dd66ceec03e32e2fbfacea619139f8937d95249b2520fec1e7bbf939208cbf0 +size 416934 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 92c0e97ccf6..c31381b5fdb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f8a5faa85746df96eed8c29beb839606ee06f17e5f8bd9fb7361a47f9a28618d -size 598188 +oid sha256:d51517abcced73cf483d80428ba1950995691aff3b94706af5ebf37833fdb7d5 +size 600408 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index ceb7c8281a0..1796a0b7a5f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3222b286a26d40e44323898291fdd9563ef70f7b1238356cfc93a1cf444d0141 -size 532468 +oid sha256:e67116bdcf5ecb2fac46dc06056d7f48798652b7df16eb17d1f6dd620f87362e +size 539300 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index d4f66ed5b36..75ab9b17744 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94e02c86565c2101de3d6d852743feeb874d449aad145f14c15ec9fc980f3f63 -size 546718 +oid sha256:be3283948cb5ed8da96c899dda4623d381380356d1986a00a9e2d1d04ab12756 +size 424864 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index cb7df04d78d..013e6763f6f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:52001830b63fde6a7c49cd4dbb7892a0811d58a6d4a065206f3676e25aecbcb2 -size 492542 +oid sha256:b8b8de657d2cc17016d83a123362d7dd837a143c1c03c76824e21741e71ad72c +size 366100 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 5c9fc94df89..b78311c1a53 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44869e834508b4ca5761b94e2dbbc8780126a107b1cb262e95b3bf1651bc5f80 -size 518374 +oid sha256:b1b96a8fdd69872ae0702c8019edd47805246c5ec1267b4916fa8588f6a3643b +size 404636 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f3c3b63639e..a9572405f9d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1ee6f00bf5233113f5710b1d83b270aa93b46a6e13e5d5d4d55accfe73bbd919 -size 464222 +oid sha256:73ac5b7c1c31890097cfcf8dcd623be91ac12189c18855bb0b90100162689ec1 +size 345872 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a2af4ea71ff..78fe682a688 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e6bfc93f5701d8028b134d6247cb92a1e5e8f5aa75c0a3734f54bf183291432f -size 609130 +oid sha256:25eef31d38e7dd13fc4c8b5ddad1414c503cda6bbbb486f2849d900ef28fe4b8 +size 486610 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1250928caae..7b7faa847ec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:35e46c5e76751c20591d6b82d4f4a93a260c0197db5c8597950e935e9f6e0b83 -size 568970 +oid sha256:d30bcc35d5effbd6be667af3d19bf8b38d021a52a2f4d3e417b2e948e469ed56 +size 452148 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 432d3054625..da58eab3d1d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8fe642afc2eb4e978d0df06060c78d96440a280bd599f6b6597093c77a3ea3f2 -size 599182 +oid sha256:fba723537418085d113c273b18866dbfb777133913e86d506bbfc2340bc76981 +size 475874 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c54a9359afa..c845bca0b23 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eee067ed71b28cd4076120249ba9ae0ef1a6ba5004ba41e9e1c413d0e4522346 -size 562970 +oid sha256:17d90eec0a59bdc7e4d40d08f03c3c55247aa6349fc2c1fa27b563dac75e8e0a +size 446962 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 8a91829bed8..59cadae0069 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:872c6b70fc2aa0f4e9a93de8999e6307150fef6027a56db0d1fd52884ed7d84b -size 652785 +oid sha256:caf3c99ba08d92571f2f5995a01b6c89a55b03f1ac6c6d1569286d1d6fae40a7 +size 655005 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 4c794a9dc10..34807221d3c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:055f9458b9c75960585cca236e1addc259e2d5710e719bc07b45411ad68c9a61 -size 586570 +oid sha256:8915c45e30aff50cc46d42ec34e3a0fa8a2401bf39a4f1a899d9c06a09829127 +size 592392 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b2bd8476805..4d4bdbffcfb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b21d6960381caf7e233de9438586eb0dda1927069617cca53b365ac2ee8b10d -size 573490 +oid sha256:0ff83cd863b24e8ce157896497a2d001a3b30ca28037bd64f812748b2d55083a +size 459530 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ee6fb39bca2..4fccb3348e7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7dbbdd1452224fd97985f7da53f4b24add1fde7b52dc63d37f6ff628ceffe032 -size 509892 +oid sha256:e5bf9b569886ae544bf989033e8917cb833b316523f84c409ad27be513b9d118 +size 389692 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index c3ea5c6e83c..54f951fe338 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb9dc85ab3a194144d147decc81c2b10baa456d92420d090d942d44b1fc1edac -size 543592 +oid sha256:d3fc291858b0c0af5b3a0694bfd3aafc0b537aaff3540a28aab7484d39dc72b5 +size 435356 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1ceb63109d5..35f6573fe1b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:61e9c147e2d38a1f4520dc7c151430b1e1dee51fed79a1c960bdc42237f8c618 -size 480784 +oid sha256:c66792149402a125e0720afd5367e371346f80a87740f1e1f9e280ba48935124 +size 368674 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index fe1684eec68..a498eae88c6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d529b9ced371f42a5fede362e3599756a10a5a5b1ca07c138d0e8da9f6b00679 -size 598078 +oid sha256:5c503e3e00848c097001c73b80f98efdb7341970ccf79a258a15e95d93192a12 +size 475560 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e2ea02cb23b..855ad055ba6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:653d3f564002c8361837ab8c35a87c2286221d460ab39d26892a790c354fd47f -size 557920 +oid sha256:c883a1e96d3601bdabba5a536e32a64492de256f9a78a3e5473b5a3973318e7d +size 441098 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c252abf5fc8..c99d92f9be5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3daa32067f37c950da9903b8343208144a33e5b6f10957dc16896a1034928602 -size 588132 +oid sha256:6a47c590c342a1dc967f6a8cc46fa88087f43db885d8cf0b5c3523a20792271f +size 464824 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7146a6b71ae..3bc659992c4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd49b254d061f8fd0681672aed0cdf86492748334045606233e6901e6c556602 -size 551130 +oid sha256:4dd8a6839f0c69a14b7c96bf65903fd19dabadd8e4ceb1b7a61bd79b326de697 +size 435912 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index ff7ec5da881..d21acc038b0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7edf765eb20e59bcb4ee5faca15dabfcb1c1e6efdaf668fc990857facc49f729 -size 640945 +oid sha256:b66b9f3c286a06aad56e755d623309acd2b2f041bf65744d2b1e0044c1610a53 +size 643955 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index cfee2753ecb..a4af6707cbf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:145fec28af43402473a5627176995c2ba2f76633a8baff2aa4d7dab3c19b45da -size 575520 +oid sha256:9756e332b460658e92f6fe698cc7e63c9a06ceaeef0e012d8a26161bcf4d2737 +size 581342 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index e2c40aa3ff6..506d38acc65 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f49910b57c4f5a615fc982f9bc7b7069aa188cecc717626f6182c40b782b430 -size 561650 +oid sha256:f5c5c4d3386e2b9963e815ee773f3f58677291252c2f0038b961d610d8644141 +size 446902 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ebf805d80bb..026901ed285 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1c62fabe435e9393064a990252857cec3974ed587d365c47a8876a89fbd968d -size 498842 +oid sha256:8f56fcd19a0645041db6c7ddf5d2b72274b3409bf2890e63594a6df18c951dcd +size 378640 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 381696d7afa..08c69a1fc45 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a0b48e878e0d75c633ca8fe152c19f7892a184c856bddb9e2210312c938f8459 -size 531752 +oid sha256:0d9d472bfb717342744c8c2d20e233d8ca1e8bcd1dad273a5dfb67cc5fb5a6aa +size 422726 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b48d7a7ea4c..93f1fc3551f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8b1ffe2f0e7d4e4a23df227992317f61f383c15e0e3cc658088bf33bd4c4957 -size 469732 +oid sha256:5b3f3b97bdbc378efed10abfca45057fc90ae7cb474f548b9a2d673d6c7d46f3 +size 357622 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 00000000000..ebf10fb7bd1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9db70bd58f0ececebe99af2a3e5b0fd2210a61dc0e132014b29b4b82226a140 +size 643601 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 00000000000..fb1bf1f3356 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8fa00342e3e314c30d5ac034911a9ead9d2fed9944669d7e76948ff81e55136 +size 558122 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 00000000000..19b4b4ebf9a --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b7926a57348901fbc4ae9a491e32e6c9618ab093413ce2de5130ceba8debc6d +size 622289 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 00000000000..cf3b47056ca --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e000110811695118a329708768b489af3d7d0fae31eaf177484b19a83fc8a0e +size 536810 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..f86c608a7e2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:468afb05624f351bc10082cbebf4bf5062e3677cea4b33261fd2c676da4e5403 +size 577350 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..284940e9076 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e12fce20eb2f118eec01b99a83b7df1f7f64084ea4597558c00e37dc7eef5338 +size 548414 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..c9b75a03825 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c77ef0846f3259e498c84eab2ceee9d9046090ea2fde08eaaa84098f9ed6bf0 +size 564246 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..6d7d9319fd3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:232488b1d9400a6b762f1bd0e2410e4b5af32a9a246d7cd25549bb1277d81e89 +size 540070 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 00000000000..8d065652c79 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d15bfb99f4c5c25802b3fd770d192ac901c92488bc720114696e67cd030ab17f +size 644685 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 00000000000..3d0858afb9f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be09aac430718bb6bc7b2f781940b2eaf3ffcf82e38f74069edb9219ad481570 +size 591396 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..73740505866 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d798a006359f1f26c859f4f0ec21610ed920a9219ee90e46f4d970c8ded378f +size 540896 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..72532d793b9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faf27c3b14559d4faab6bd9966398575418d188528472671484f80757d02b4c6 +size 483712 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..c0797647955 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d3c65e042961993f48528bf6889c14930edc96711d3a8c10e7a141e59fdad21 +size 519090 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..8427338b619 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f773f14e5eb64282f0cb20def7afca642c44b8f3d88d7de64a81ce97ebbc730e +size 462694 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..745f21d7924 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6eb83939ffe064c5754f001c39a7438ef3fc26b0361e89d0dd67e9f99e8b942 +size 556826 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..c1e366c3f7b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:258447d9fea24b06db34ccda1786ea522e7f6de28929ff6a3e1d528fc7ce23cf +size 527890 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..2f4ed9ad1c3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7f5507c10fb937110d5b53136a2da733ac3dae4453d206d33c3b3ba4c53a2f5 +size 542934 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..3ac1d9d91e7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d1ebccdf1fbb88bef38b3ec3ae7011d5720460dc26f9579078160b0d50b32f6 +size 518758 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 00000000000..14fd889e92b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22f8b36db9bee30c84d485928c7868224ea6b3cfbde22c41df16e21c033090b0 +size 623373 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 00000000000..2a015654574 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee1938fbd4ab4bffb0481fb2ca6c275618fcca3bcc111223eaa1ee08bd80b889 +size 570084 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..1caa733da3d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7fd2571726fd4cb244aecc37947527269dea08c117b76fa557f84f8e98e31e6 +size 519584 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..897992d079d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad29a024c26c97cfca9c5fd36711d75a1b2e2ee003b6d6346f6b24011b94d026 +size 462400 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..bf6869648a3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be323d5d9b2ccd7f4dbb76ee5a382e139964300bbdd43303807bfc823791e631 +size 497778 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..0e5d8b8e498 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cea6e8f1bec6a98de7ca2222e31812508ec738efdbc393996e78f4e9f60c57b +size 441382 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..0b73adfa2e2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b68914d54cd117324f3fbeb96e72aaddab0c493905a6723c1a09bf59a0e4a17 +size 597906 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..9e888180eb1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80c1477577eae3cbd18de1b93d924ff2389788b7a8a38b61479951939c21c52d +size 568180 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..dabfa886e1b --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef887adc13a228174a68d2b216ed5af26d2a0f02a815b040d22f885a750ef9cb +size 584802 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..cf9c83cff42 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:079be02277fe6e79eed76ae5de3db2b824633e01ef4c92000cc2ce2b3c2ecb10 +size 559048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 00000000000..daa3c29f2bc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e5eafb627cc20d2478f1ecfbf9281bed9557296198150c46c636138819df402 +size 680831 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 00000000000..7da50d2bbb8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71581792b78acf65fbac95c4fbfa91dcb56c3722245fcac43723d81eb6f05f40 +size 596142 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..5232eadb74c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcf550b19440f807eaedfe3d2f5612843c15aa53efeb698e02c079e956685035 +size 564512 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..5317b261483 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daf06a7d096b36abf96c3709d7e27c5dbcaef0fdef55bc2c687bda58037f2360 +size 491516 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..c1f215fc2d6 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8892c0099e53f8788c95e413d00fc0151fdd6399e5b3422ad798983b6831271 +size 538758 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..eedf7c300a7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79d6aa742973be1f54491256d3b1d328c4700e958e61bc48ec3f9c86beb99503 +size 469708 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..d664083b253 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:024d03446d8b060ec5cb5743fe25b9564488a06d9a169fc50d3a540ef3a68c45 +size 577384 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..fba4a9160a8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e0aa7f3c05928c187d44ae19258969c6305b3d9727aecee50df43fda9ba64c2 +size 546868 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..fb980b7744f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7cf2baf07aba0d19c90cb99205d59b25ad9eccbdde83f4fcd912c992c8ddd3 +size 563490 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..0b4667d36a2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4a92a9f1363fcf04df4663cd5e1da2882531d3a7d604fde2ef49fb4c7ae64f9 +size 538526 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 00000000000..acbfc2dbc86 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abebdb601aec5de59597c60734337231893988e5321c8bbbda1fd98c16e7c4c +size 659519 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 00000000000..313d8e78bc7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26ae99eb517b8e032f75f35bce06348c528f75797e6b7a72e3089239e307ac48 +size 574830 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..714916c2186 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7890a9b7b9f9be2ec568fd9ab329716b883a4980a52ac4d8c7f0cf0c7c701351 +size 541622 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..83c60684232 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96b61bac1e9df321b4589dcbaa91dbc9452d1edc37eb0191f051092d12580e71 +size 470204 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..1d791f576da --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21847047d1bb26303f3ff65bdcefc0bc293022aa7f33f0ef225669e382969ee2 +size 515868 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..47909c4ed47 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7951d661d2091fbf0edd90d4f69f6e9bb188f250763e0d101bd3bdeb7da3836c +size 448396 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index cf6369b4ca2..abb0dd5d027 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06836f7c78db1eea5c59ab3d5f26fb04278703f38b07c4de68a9900857941c73 -size 541034 +oid sha256:f607c61775edfcf3f47b485f63257fbce88fe142f7ec181c6120cae1020ebcea +size 546412 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index b07231bc3ab..50f6cd1fc0d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2bba770f47310f7ae28b19e3e4a8cb99d69c9b60ea32f57115edc74478f3e4cc -size 482000 +oid sha256:4550c99b127d6e5cce91a0aada71698c1d53d22c9369ca869fe78ea8a9db5c06 +size 489004 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 1112f3b527d..0b844befc0c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ca2967835f15d52449a744deaf2235bde70a85dbebe0ab198f318bdd1dd6a53 -size 529984 +oid sha256:104f983b6c784ae4dc350002ba2041a5a611f35b894e9559908237e27bf5485d +size 535362 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 2cc728c80d1..94b2efa9d63 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3cb5cd33ad61e4d88e669f25cd02fac75eb1f84d6f8e48f209566fb4019c5fd7 -size 470948 +oid sha256:ec11f44c73f8a0c30e17c4f38e0ca83ca2caf35930a01916c0a37dd5f4bb2866 +size 477954 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7d909008be8..321f573a587 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa464c54634e0f6c81b6ac2372504be315e8d6d6aaabffdb677bbffe9b194ded -size 511340 +oid sha256:5d2dd9c8691ac39d863ec6a03949701726e37a020d7d309953608e91b219c57c +size 440226 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 75768308098..848134aa86b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ec830b43a202c44903d512e5a0af4cabb6b67b8c662cddc30de60c647b8c0a6 -size 486966 +oid sha256:18e44860c35a0e70ab5d251842be62db55f6c0ae1a0d1d56a4d1242a3e589ffe +size 422118 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 635c3023870..1f23599fd05 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4249198469f15a9885d24793b0ce39866e9b86c5e1a341aff0b1deb78575af94 -size 513332 +oid sha256:8b92cf1407bf7d4a412a33261e752095afcd73db7ebec181db8eb870742470cc +size 434990 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 443a5967758..56ba3cb57ea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32c2a38eb147ef88bc7d5e12154c635ad60ef3ed83dcae55074e5ede264ff942 -size 488958 +oid sha256:11627a586e9192c7b98d6cb5d7e5d0e0523466e93727cdc44b98323f0919d100 +size 416932 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index af4b0788363..632ef8d2a9e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b85d77d65a25121c1997fcb9d683c80da4fdf4a6b2a4fedd5b5128a321279fc -size 536296 +oid sha256:48d2d8411beef49b0505b1ad3d9abf8cc20735d581d9c77255df1d61646d5da4 +size 542464 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index d249c49907e..f36746d3f02 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28caf8775f66c44d142d63cfd682d11d131624e91c79c9dc12ccb23ec67b1374 -size 480518 +oid sha256:ccb3c33ffa5228d40a13c5e7dff78af575091d80797cad71ee3e8a2e44e5ca8c +size 488510 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 36d1e69ecdf..c18312e668c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d6abb9bbfc74bc077a46c1d9d60ae0da3feec796a90d63dcd862a8744b85844 -size 498764 +oid sha256:aa9d59793942531b1625395db781c0f4acba9cf3126a6cc51009c94e667249e0 +size 424270 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 52c3e00dc17..b943099b738 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8fc2bcb9bb1570150bb51a1de8efb2c63035c0ea67cd52ecd8bbf584c965ede5 -size 444564 +oid sha256:07cb79b0f19b0daeead6b66ff4a7d7a9a0afe3c8792f592d20ff7ecb782a289d +size 367086 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index f3575bbdb11..fa947286103 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:938215e189778dabaf79693b6d5ed43aa138d6300b1c1249f14bca461e6d86d8 -size 458432 +oid sha256:e3b20d866a20e7d520b0bb2f94e64558bab22fb20e624d0a6a130e933ec53d29 +size 403844 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b005201bf25..c5190b994ec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:817c172c740029d2c07ee6394aec0d8885a7a8037e1e4965c4a7b4b4b8628cc8 -size 417008 +oid sha256:58773456ab84b701a85f0d6b8cf124399caa13e8d8999eb0ea33cb5c1a862165 +size 347448 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 209666c80f5..c85dc5aa134 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a857d94199fed928ec3d3048d95007216cfd1eb559afedb44b0a8324b6edc3cb -size 501078 +oid sha256:bd834a9233128b8aa2748ddcf5aa1b5d40d5c0dfdb8d36c7189b5a9ddb4f6a74 +size 429964 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6e3086a91b9..e8156ce60dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25ba9ad69089440a3262ebb0eabd33e90159a95117bf73dbcd218e8b6b5407c1 -size 476706 +oid sha256:d83f7c1d260828c770337d2d82a5737cb036e5245b35de3bd72775b51a8e646e +size 411068 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 38be318c908..0a98b22ac14 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:322662eae56191f968b18c48e435fdca29a89cb82964d00151930d608a6e9705 -size 501492 +oid sha256:dd5c82c6ace95f20e192889242ee5fd97e5110c30403cbf9eb565a3fdfcfa9e1 +size 423940 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d0abab5f5a8..e0a947a7ff6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f80f4352311cf7f9c21daf5891084092accd75468f464ea622dde04b7cf5e37 -size 477118 +oid sha256:15f1f68f21b5c4864304e85785d297519278d025de9ab4d16537ea674f82788e +size 405882 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index d4604a8513e..7ca6dbdff25 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5c6c956aae0f5c33b226cff9fc6726185bc58146480bef492f5b1c104f24a82 -size 525246 +oid sha256:1128a62a4d31b6d238c4fa736da158556e57ca7a58435207279beebe6cb292a1 +size 530624 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 6f99e500443..30585b4a911 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:903dd8220f82823d394cf30bf38b0b1523bc609ffd4370d83ef5cdd73867ee26 -size 469466 +oid sha256:09142340b328fcb68317406a0e9481c3190b64ac4892de355c91643d898fddd6 +size 477458 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 74e16e6547c..38870381965 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ea08615a43732056d1b6fd4f99d93733d920b5cf4316d372c0d76f4f848e0014 -size 486924 +oid sha256:b6bdd27c0d5e288916d0cb0ae9c0d0499221cb7656c361279cf4b012e535e5cd +size 413220 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f5690018673..2d6fc07a2de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f2abfcad4ab2dd5f2dd05c0fdf71f5b462e843a145c481ec9de787ba43bb32b -size 433512 +oid sha256:6464a76a1341af0760acd2cb7cb9cb8f292d16c608ce983e0c12bded5f3e74d4 +size 356034 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index d45c290d3d7..99f0f274dbe 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:72570f056902787ea84725d6eba830eac438a56ade658e6c97e5128e2f03ea49 -size 447382 +oid sha256:e9b7f34e656d005533a860401ea93b5b6a7187243ae307843e212b5f38bb5698 +size 392004 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6e3cea43d61..e8ca300c33c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:88ad86b65b9cd188283ea3237503b76103a658d7e0b1c41878f1c067b69c414b -size 405958 +oid sha256:7e21c44f85c8d8bf0f528dfc735151ab632337ac21766ab7296ebfebc43de68c +size 336398 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 73e608a141c..d5ef74de6d4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92928e8fdd3ff1da24e5b6b931146295a05fa010bff50ad271bd6525f9118813 -size 527160 +oid sha256:0d7ae099f2a7cf348874977fa3c19bc7b8c60c5ca632846d581ed68c3f019d07 +size 460782 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b57e2166be1..6277545765b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bac51dcf684f3e2139c70bfb7c5f74611a3aaaaa5ba20b4b3d39eab230ef40ea -size 501998 +oid sha256:887e0cf2e483e6f877377095038434bbea75a30a125fbbbbb6bae880abad7813 +size 441096 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 64259d8b202..68e3248ebf0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f3f0529a1531955a54671bcbf6aab4a85e378e1ab3fabfb00e1a12e83e04ef7 -size 528362 +oid sha256:051899c7a14da745a2b534e484f5d95529a4609bbb24ee988c09f4b9c85a2b75 +size 454758 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ff99856269d..d662b287975 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c903cbe847179ca25dd0ad9b1a7cf1b5e30a749d63c73a20735741f6f24a5f15 -size 503200 +oid sha256:d5e1d3c386e5ffb1657e9e383022d7085ac256e3dd1ac462b577e26b8da09f38 +size 435910 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 64985d0fc46..0953e009188 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:62cc69a467567c22d2d9ffe22ecb416e68d391e8136ed521e9f8df3bfab6e97e -size 580632 +oid sha256:f3bb7ec44cf77174a61ecdf88b4217677e839f0c7639dbfb4ddac65d701414c1 +size 586010 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index d7ac0f6dac5..1e7570d5f37 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cfde19665a429da87763a778c68fd9f916acf76dd1ba7cdf03701bac27008a67 -size 519920 +oid sha256:c9ae0c184a2f6aadc212bed9c55de5e08aaa4ef6fe7266506aac33026d1bbf69 +size 528084 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index edafa668f87..d1d4f805aaf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f410d7c9c6adc61f74e383775562710cb184e743121049757237579c24755de4 -size 513722 +oid sha256:d48f8a923fb2792bd6a76f777a58d89e15d8f8cee3f2b8a76382ac0ce060de34 +size 447886 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d4c611ef2de..2f3d2ca1b7b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4916e5d2061bad8d9b7983e780bc08a4d42be2b8b60832be4f9c2517831d946 -size 447680 +oid sha256:c344a297ed93cefef2b9c383eb09d79e26b70345b9784f07758ad322996b4c12 +size 378046 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index b30b6adee7f..eef777c1f3b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46e30d0ae43ac6275d4f16157e1f2a032ed0ef98a784100802a576daa7bc72d0 -size 483824 +oid sha256:a2d02461d3685f23a33ba9e8f7ee0ea56e7a9522b43ca7dd3eab229f39531a99 +size 424302 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f31bbb63032..e10cb828141 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f51d137ad16fe51d3b96286b893eb77a2aac3e64a884cf161fb0398c1a063f7 -size 420126 +oid sha256:0340900f165600e00a29b2a50cf0929369c5a730d122251816859590e9989b78 +size 356832 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9217cc481d4..57f517e6a72 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:40ca2244b170e7ada9d6d6d5b5599850af86521d7a8a87596951dc1d3f3aa3dc -size 516110 +oid sha256:19bacc7e90c7545799584fc518a5000358a777cdda48f2df2b3b2ad86e19fe1a +size 449732 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0373617ab6a..a411d65e9ff 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5c68a3a2231b02e834d5d71611756972e465631ec6982ef0a85055cf6fcfa141 -size 491736 +oid sha256:fa3d22c3085847a1df031379177b9a456515c590f2854c95ba22c76aea549158 +size 430046 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 968234f2a18..d630867b223 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8d8ccc12d6c7b1ea617f71cf8b2064be2bbf2b599e5b7a90ad4d9aa922e6de8 -size 517312 +oid sha256:20602da1e43c2c1f44566c0de1854322fffb1fdc51c08223c3b5948d4062f94a +size 443706 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ee0259fd31e..9e7cb255371 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b75acd8f49330ce953fdb7098e4ffd360b9dc733bd628e6f801726345ebb6c00 -size 492150 +oid sha256:36bd8373c35e11bcf8596429257a3d32756d003620d5e381d0982d56a08ff449 +size 425648 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 8a35129fb5c..d73b8d4c5ba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d838b0512319c8fa319450204d25136434c485be287adc946e5ccd10bc5d2845 -size 569582 +oid sha256:460f55b9356a49ddef88cccba00405c65e7f9da766597551aecd281061d1487c +size 574958 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 4f04ac78129..4cb68a96bb4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8642f99cb3b7711dba256222172e40c1a754e07e4f4ecf90186aebec3f5b5d13 -size 508868 +oid sha256:b642a5560e0b5fb0e1f855e017a7732b45d3e5ec160a71c75f45426d9eaf51b8 +size 517034 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index e82653d8526..ed3599bf4d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:276b40428b4526b3cb9d67569bc7b3084ea0c605b0ba31c78721371f09f251e2 -size 501882 +oid sha256:cc67e2c11a2d3a5bec8c3a8b75f23222943215db1182b38b582b000fca4abcbe +size 435256 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5ee89796340..1835b40bb9e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c9fad57ec5c64a443aa3759f1f74959816423394b6d4bcbcaef1f03b88ccce15 -size 436630 +oid sha256:7700dda59dc2d1229e0a465122cf49baaaa1dbea6c8fb4c6f52c6f959adad71e +size 366996 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 876b86a9f41..bb3e084f294 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee2d9c836fddbef1d756fa120a80302ecf53a9e7129778c3e971168c5052c76d -size 471984 +oid sha256:f23b7ee254c170c8099bddb777fffc143a1b51d6402b085e3a9c3d0759dd02b7 +size 410884 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f003e321a70..ea47c27c173 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1dda997feb633d61d92543384520a0bf7cf792638b510eb3b2b3b0d18dafa909 -size 409076 +oid sha256:c3d44a032f36ca85ff0c86daeb474e59c996da40e85bd9611f8ef606c8f1a8c7 +size 345780 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index bfa6a73bd56..855cd6cc04d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:857bf07b827fd7de04c8701c9e5faf19adaa0941c96d1b2aa05f37c5b4427434 -size 723701 +oid sha256:804110850cdcec65467364ccc6eb4451676d49b4ef90edbf31c3b8d1965682bc +size 711367 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 68be6b86b57..81382becf2a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a1080c0634b3dea52a1fa23c5deb48e9f6c8963ab44d086993e6d34ebecebeb -size 632277 +oid sha256:55c1452ad7a0b7e52b4b1fab46362acda2541fb9ab7815bddc3cf023a4e016d8 +size 623397 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 8e6f8fb416b..96f424ca889 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93476deaa5e37e11b2686a167aff45193b8141c3d43374b6c790490139ea69b2 -size 720985 +oid sha256:6b6a44dc167aad9761dbaf8bd40c5d8ee559a49e5434dd1be4b118f149ef94cd +size 708947 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index a58697b55f5..025b135d3ab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f34579590d2eed3afd09761ec9fe6d18123c6dd57acfdf99b8e88af090a10d3b -size 633115 +oid sha256:e4a5269de77c048079a5f1f0db202a0fa9563c632fc25fa54f6cd3e29dbf7171 +size 624283 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index de1371c64ce..38feeaa200b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4abd9d66148894fd77fff0dcc14541d326d34462fe1eb0b4fe801f48b7033883 -size 791565 +oid sha256:cee8e891d78a0af738a5cab2dcc2940000be44a65151f2a5b60b4cb55b014b5c +size 777605 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index e4200736b97..fdaabb5f147 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3c18c82c50d44955565fe97f3d134bd96ff2fb96a73faab8784b4f2001eb9058 -size 699897 +oid sha256:31978f2381f7d64d899e02b189fe8038577bb42976223cfbe19d9956465962b2 +size 691805 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index cd2884b7c51..3495477d139 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d0068902aa82930bcd51ddac7ead28dafe70f9bc81f14cb1ea2bf070a4b6533 -size 823059 +oid sha256:f0d3a72762fd3d2475bdb000fabb11cf0183e7b5915e5209c4b77ad78715e7e2 +size 811515 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 53e52015029..63935c18a29 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e8c38af34a2707f7a20c6d10eaf974b89bb3119ca60ed29c3d3234075bb9f19a -size 727147 +oid sha256:c19363eb32d2400db250d4dcadd1639632e8eabd8fed35bf816e6daaabf4cd46 +size 714815 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 760c2519012..9502158bff9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:40b5af935feec4ca89750a22419f20c5970094ca8fb88917a98ba08629a68fb3 -size 809543 +oid sha256:d4e11f8083004eb096f3f4c41600c32c00a51b7c13e940ef46433e13e34b9f05 +size 797999 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 705d23dd3a9..83614a741ba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:afa28f8726477462225862ab416d7df81dd519adc605d30465b2e66a25e382d9 -size 712841 +oid sha256:54938d4c1993c197ebe62d1d2001c95758cd3f23195d053e82c3812a603a219a +size 701297 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 1a856a406ca..267ee238074 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0c779a1a1f91019c9f62baade367fa562382995a85bcc56c2b2aab58572629a8 -size 819851 +oid sha256:6afc1cda7605e0e29c90c74993cba32801b0de94353e4235af294c99ebf5b306 +size 806827 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index d72694a36bd..84a27ac27c4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:72c670f8919ce9e89a4cb3b342a2be81de2ddb4462a2db0a4b6adce22f92711e -size 727541 +oid sha256:8b53cf6a19d213b69c2759386ba4c2ffe6f6874b7118090a7f277ed53584bd6a +size 715503 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index c58c3111be6..47b725d1b64 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e04f3c5c182df54c21eb834406bc6e69cb7ba5931e240663b2b2db5331b453e -size 805545 +oid sha256:e2eba1a0192aaea67c44510b35e5ffe5730caa82a2f9b78ff156ac1b83c5f7ba +size 792619 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index ef1b060af67..56b458c6734 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f860b5aead51f83794fc8111179662422aca0e5fc403d8bf07abc6fac516542d -size 713233 +oid sha256:8589f0ae1daba58eac6b6431e8d72424298d5622c3bed9430246efa64fa95a1b +size 701197 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 3c1e66c96a5..8d030aaf0b9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7cb9c7cfe6b1ef62cc4aad134ca29b8c3c094607a928c472305c1542d19eaaa1 -size 893195 +oid sha256:ab47c4b41dcdb3f52ee2eab4bc68bf2828e18f1e5afe946aac96e8fca08087fe +size 881749 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index e44e21f2e9c..678cf4ca213 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:576e44765389ff8b4a1657b01315a956f71008f526d59ba36fccce7f7d59d742 -size 796049 +oid sha256:2efca32cedc22eea1bd558a85cf02921f5c1b03ddc87d7f64a7554aa542a98f6 +size 784999 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 49cf5b02966..5c9f8f70b29 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d1adc2603f4b7ab1b42135f81ebb2c088272a95ba94967d3cdbdeed9d2850ab -size 879677 +oid sha256:76e8db79ce9027897c95c6ab8d66e8efdab01d7b5a3acfddfad38653a89b5b1b +size 868281 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index a6d8a60c570..7c6313d2052 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f97e17f06746b036e2737a8883245b6e5c29283ef7df4893fb0205c143d2a884 -size 781743 +oid sha256:593849bb4e1ad8ae8d6ff74f56e7e24a586c227b2d49b358c40452cd7141ed61 +size 770741 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 35630186ed7..c098f57b8b1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:667e0eea66d493f7da246073791921a87a7e7c74d7188a2601de98340e30a4c7 -size 661737 +oid sha256:6af6026d1b6978809196f92d94e89443852e95560c9b9317935b53fcaaeda3ad +size 645705 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 60c6bf9897f..79d90547424 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:66458aa541fdd5530336248fbd62f7979a60c271b734f2fad8cf6b24b5a2739a -size 564444 +oid sha256:97aaf3840d2a5e87140160465479d9791cb968633fb79031e0aaa0e53ed7b2d5 +size 547768 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index dbe610589fb..17b436cd316 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:42016cc64a7f1831f86ad799dfdceb36ded14c4993f5b7dc1c7d6859af3155c8 -size 660157 +oid sha256:075282fc574b4b9fe9cda91dfd168f4ad966e4e69a957e1097fa406fd5c6e300 +size 643187 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index a5a5e335349..9470cd2541a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58099c7dca9f71bcd6617d10b53e178ef3cbaade2ae9f479c85ac57d350acfbf -size 579784 +oid sha256:7389159398ee1fb73662efd775c3784b99e4b2def7558a18e6b60fd5166a9a65 +size 561580 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 7b79c977fec..912d850076c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:074473fc6a0ed2332b151aa480c0b327997f656fb48f73d5781ce3a5bec5d0b4 -size 728913 +oid sha256:b14b5733f0fa49c32a1d0e050c296a027cf61f7cfdcc32fe28fee5e19d607d22 +size 712879 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index bf1f0d79c5c..13ffa915570 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3da84a48bc30bd28ad9245daade56db7e9ca46bad9b0b3153f32da913f5c2f87 -size 631915 +oid sha256:fe5ca895ddbe2658d739287bb747fba36154931af230da966ac55092c20dd48d +size 616028 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index eb6f0ff62c7..7dee5a65167 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dad7c7d2f73445c1e64d3eee2b230e4ea029f72cb3c15d5ff5f705e004fb0c52 -size 779745 +oid sha256:48383c75a59ff494337eba35f85a0ee5499245b2ad809d61b3db2ee17b942002 +size 763859 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 203673b99bd..396456bb2f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d31e9fef063fa5d1c6c4fc2d2cf9b3ffd647e618690f7887aa183c59b1f9e39b -size 674607 +oid sha256:779eed8da4ef730609b37b2c5f7ae7e80d036717d9f9ff7578764f1691810f03 +size 657143 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 05bb3ca250b..2a58c21ea67 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba71a165c7d43658c26752a5eaf5dfe4fc36ce2a03abeb570f48a095a769bd5c -size 751971 +oid sha256:6efeb679d7e776cb6509ed9f969c575a06757b8f590d901f0a0097d26d1b31f2 +size 736035 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 7b74f597608..3c25bb05567 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:08c7d056a97d577972eae0bca7b76ece99fb2c4f87c4d39cc96ef4bfd994bf2a -size 646783 +oid sha256:0e516892fd8d94a877d7e522a1245e3d886c80a2663485bee58acf5231ccfbe0 +size 630109 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index c32a747e48f..55b2e34b391 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:850e1fcacc971868ba94867cd00d03fb82499595f2f1257580054d39a684ca8c -size 774859 +oid sha256:d6454f7830809b5a90b95cb721e7380b6d34eadf362c1aecc86c94a27a3f28e2 +size 757691 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 6b0a7198c60..266d51e4882 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65cd94673f5053d5e525dcb4b79ff8071c460202d37f5e006f9eeca659f1e974 -size 689899 +oid sha256:854916a043f4dbd2c1943cddb7e337d2d7dc15f9a572958df1390695990c9eb1 +size 672237 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 34740606293..67833f9e8a1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d28dfff144dee8d23b28ba9b0e7ebdb5364c803eb3968cfa269b334fe8028a9 -size 747085 +oid sha256:144179855a572f6c80d98250f1fe57eaba6a77f337d1c09a80322e3616356459 +size 729917 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index ab7f870bf3d..25d6f861863 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07e15c5dcd564c2627fb17eee65a692875bb66e948a0733813afb49347b63140 -size 662865 +oid sha256:60f5fae056d9bde564424995b73351471458086975d5d7ab1b81d17555717aec +size 644463 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index ee5b79951a7..75b60f0e153 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0fcb9ac49c4bdae3db5e1967e692d770145f70d24a2012bccf7f109c7ac9b476 -size 844947 +oid sha256:c58f0ad79c3554d598ffb065d93843fdded072589fc19cd2c49289f441a9b7bd +size 827039 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index ad6a85afc18..7c24dda3535 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f09ed47ec94b6860becf1e3b3d1d5f203c2d588bb7715ad54fd6bdeb6b5db712 -size 740993 +oid sha256:10971246447c2424b91ea2e1c1cb52280773e178af1f6fdc8ca97069165b581e +size 724515 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 3f181f62a0c..18f2194101f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb6fbe4b202bf9813401f14c93c1d2cc3644b5ae4506ec07af5e8c8a61fd6edf -size 817123 +oid sha256:884048501e9322995404cf6f1527b4f6da4d72418678ba012a4c91ed3d9db068 +size 799263 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index d7becd23373..bb9e96c4dbc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6cf7bd7a095ec936d1bddbc7f58e458444d6380f5f843658e416d9bf34445b0 -size 713219 +oid sha256:369aed24cf15e036abec3b6aadc27de5e0dbc4a339ce7a1132e2fbaa04416063 +size 697531 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index f8eadccfd27..036f2604c8a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c39257dc273946be07032a9a8f39b1a4d1d23fa2d34d7d4b8c8c2eaf40ad6766 -size 635935 +oid sha256:a682cc9596b42b629a004545afaa2edbc5e0ed1987c0ecea1bc1131ee93570ed +size 646837 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index be0d2acd31e..31c9b163616 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd246a42ab180df0196d3be1e1ef2edcddd85c6632eded3c36ba888a66c9deaf -size 547126 +oid sha256:1917100a32cd74153d9cc857eef03af4317df62611dff17618064ef204a0e3e7 +size 563110 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index a15d826aa58..ef4a9a5f2a4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f45527b6eefc61d09eaa99fa2d8dcd12c9d303456f193c94d864d84447dbee6f -size 633515 +oid sha256:de56d155416ca0a8cc32853dbdf3e6bdc67478bebe3ddfbdc77498aab9ab15c3 +size 644467 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index d6d5808a829..49c6f121e48 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f96315dbc83da007a2ab8d0e72ca0a39fd913e35b5782f8050cfa178a247cbac -size 546038 +oid sha256:63a3055203eee0d2630f256b6f978d78a567e091e5feb30a9c0acf35d514d006 +size 562812 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 0e172c382ba..f4686f01a0f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:98655d361bb3dbc62888f9295acc799b85759388d0491cb904e07d86906a8f8b -size 702961 +oid sha256:afb8a310b54db9b803c7cd14277f37b0b7511e0179d7ed36f08f97f906c83ab0 +size 713863 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 19df1259158..c7aca016150 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b5a0971a986459272ba2d15d436aa7784921b3a5270825be463b9a54e9b2a5c1 -size 615238 +oid sha256:c651df56d13922e07734298331442f2cd8d4fa23b371c0239389b287d08dd7d6 +size 632011 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 5840723ad3e..10cc8a22df8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d815fcebd2812041bfe6c27886a9790564e8c43ebb1558a8e7b797deb15e4f82 -size 726463 +oid sha256:2740d5517d60d7de1dfee9e35fd075fac962cb080d0fa06df205e929eca0eca5 +size 737465 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index d5d48a45a15..e1be9946f7c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79faf92e53a04b250be660ae031c6734ef3ee92e5c764f9a29416dd2480cebb8 -size 626309 +oid sha256:3abe40c86b6af5f3484c9396a79e5dd5b002f9410fb84b340a30535996c6c922 +size 643081 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 67a9ef88d11..2eed5cb71f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d8184673ca86917f0161e7d9a18e58c37254c7bfc93bc51f12499a249735cc2 -size 719409 +oid sha256:0f4ff88fe6615d1f6f1c388e23364a45ea33b004c64b317a146d61043668b262 +size 730311 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index e5aa014d74f..b1223679961 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e6d8cbb03a9ba5faec8d3f00c50aa11c2dcf43f46ac8bccd38a0040a5917342 -size 619205 +oid sha256:36f5a18dbe9dc32f28af7558805b0e1d3f989eed4c86e735758830b37d0ee75b +size 635929 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 487c52be8a7..637c744ee77 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4dba3c4ac676a170f37f8b39ef3c538cbb9ef1297fede571ceab012ac00337d1 -size 719751 +oid sha256:e9218344b6dc1f1cd50917b63409317079fb7521f4a42583ea4ccb5d4cc74738 +size 726955 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 99b6a396893..3c6fc7809af 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b9c9b37ab25b4947e78b76038874b2eda86d658da31eadf6f7c58e020701ad2 -size 628427 +oid sha256:c4ae46ed1731bd43d10ac90b90673d2bb099d60bc79fd42468d7968dfaaeb875 +size 642883 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 3ac88c8cb70..783d06d995b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a13b2936f0233b1f85a060279ac07736a486723e940ccda893c270c07d50365f -size 712499 +oid sha256:36748f31ebdf23ccbb5885f6a87d8b77a24a1f14cd24aa417315a04fd4d59093 +size 719899 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index f4642dcce77..bb0e7ec9256 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e432b42ddac5c29b816d6cba39df8297e4a64ce3b3f7ca9bb873529a669e00c7 -size 621275 +oid sha256:2c8510edde0cfada8d26666c1f0eff9767528106043d65d4164b71df81e60d3e +size 635779 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 5bd536c8525..adbe5e2b071 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28b80a69c3337a9374cdfe675decec5f1c1570968a1acafd6703183e5bede6d3 -size 795463 +oid sha256:9b30f96eee241c4ae70e41c34a999a50e3667efb27d23816f2bbc31dba4edf3a +size 806465 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 712eb808af4..1ce8cf1fbe8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e68d537e9ab778543e74238ee09328bda095c18aa2053028a4b8e933cd13df78 -size 697331 +oid sha256:f6f4d29abebffb8c8988173ceb174669cd875f09094bd3dbd702748007008e9d +size 713611 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 1322a2c08eb..9cb91d97674 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d3cab529b8adaf0fbedc8bcb1fe0d1682057a99882d4adbcef9347316d4a08a -size 788409 +oid sha256:139494e09ecbb1e09b53984d363c9a5eb3a785ca42b76e1cfc8f21336b6b37da +size 799311 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index faf7a61c479..a32698f1f57 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:598d6ece3ba194a49cf5039192a80d4d4bd7bfcb0f41d7db59c9abca5e460314 -size 690177 +oid sha256:d3f2a0ac5dfeccedef4d35b67cd6b4b0c81876fe324d213f8826d28c03a236dc +size 706457 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index cbe1e3fba9e..152e2a100f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5122170875ecca9d406fa3825653fd5c10bbd44f85de68b24b8d4bbf4ce0c91c -size 755633 +oid sha256:a467d8291efa037a5ee3db3e70058174df4f941b20e8f35d0950f31a30e147e2 +size 721001 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index c112526672e..d4e4139ca9e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:03cccd2443c0ad4cf1282f13e91b6cb0d6b8cf5e45bddf030180f75fca637222 -size 663223 +oid sha256:9d49b52cfc3b40bd6837d291514d7c574401b191efc06e23d0d89ca58fa3f4d0 +size 630811 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 1e1ae8461e6..297df0549c6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd3f862d5abd374f11cf2f0dc0f9e2da7956f4b5e3e478bfa8f57a2fbd5b75ec -size 753805 +oid sha256:6d55a8c572f892ef85cd611f015dd978d0e6ff5b1755477b6ccc5b71a9f767e8 +size 719519 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 08be5a0fa75..41de2eea3b3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:56af75abfc68fdcdebb793c182dc1f27a2f2c02c19aa69dacf0d1405dc89edb9 -size 669537 +oid sha256:2d08d25174c0c75fe700eb7f2fb8eaa0c2306d4ce0a7fb4d834be5a5eb938572 +size 633029 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 76603e74929..c0a648493b1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b372902960e53986085cb7de84b596cd5a832f4658de2408fff3146a68555168 -size 840291 +oid sha256:2063cfb645381619514aa58aa5a98e10695afe78667f8cf04e56246b147cc046 +size 829487 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index bc9453c37d0..35e911033b7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09556e17635abd2255bcd3f593815f77e539a842b44a00961222c99f3ac8ea53 -size 769045 +oid sha256:179a0a53d23351ea72d93c84c32be30079b18b458af2c60d252bdf646a988b5d +size 734019 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index a8ddbb3e15b..1b678d52b70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f0a598b7443e9bcf9cc7cd0f8bca8887ede39f88392d530c1233d421d030a06 -size 820459 +oid sha256:26f0c96f5f10828f583451acb13ee3d32dcd195275a1603572fe538c83f7106f +size 806449 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 1b544c2b68d..105c722d7cf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f3dffc8f2a084c6cd87cd1658bc20ad0faacd26e9fa725a9feb669345c0a4e61 -size 748375 +oid sha256:f340ab15bbb03f8ea2b5ec02d65bf85df97b407840cbeeb7beabe2503f319226 +size 709599 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index c0183fbca29..fbba936900b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e72a997c07f21fa241707b2027d7d552de8e2bbf72ff375f093eb5e1048cd87d -size 863623 +oid sha256:0ac1b023069d6f789da8697f05e1d286a142b337d59e9418e34c3de89c75c666 +size 827463 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 3371c686760..80c87eba419 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:17e0ec2345d2b9991f3f3f21b62b2d55c6fe810a2161ab6b2b5d49250e7e465d -size 787001 +oid sha256:b245ed24864772e6657c1eced824f20dd45fcc4774bd75f207efaa7baeac082c +size 752961 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 4b033c63579..484fdcff569 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb7b6cd54dbf13cf51ee60b5786b6e6cbe50d7808de444aa14949ee6bb1b00aa -size 843891 +oid sha256:703102b03f63def53d2957246d6bd55b65a715d5b0be349a26dc861542909f0f +size 806891 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index b7bfadaaa38..e36388fa8c9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:11b39c0e719b71e7560de7cb1e07d41c2e68419fcd560a3be2fc6c630a3cd3ea -size 759917 +oid sha256:59bfcce86ad4da128485a4a87aee0c820a6b0333188e0e6b8e9cdd5406034bb0 +size 721437 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 565c316aa49..ab3c3c4933a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e61fb6ee84e126e5ef68e1e82e98b88f94b600499fe60876eb30f0dbdef045f4 -size 756427 +oid sha256:bb6d6b093d7c99cfb2ec7e8f0dc9fd769202842818d147e8554c818d1586cb92 +size 721795 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index f67fa9bfd6e..01c609be74d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a9d8a1475858b6fd5b857e5fe286075e305185ba0127357949849e54b5a6be5 -size 664017 +oid sha256:70b892aae43e722b43f8658f185ae28fe0b4de56721512c1cbd37d48c4a9dc73 +size 631605 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index c93749411e5..f4ec64b76c0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:915b9757bc1fae5ac02aaa79ee939c6bf621c6b9598781ed4ba9c822bf1cdc9f -size 754599 +oid sha256:d49fdb2ee49e3555fa56db74e5837ee22f3752f5301eb395fefa6df2f4138a24 +size 719523 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 5f32d581da0..c3c892d61d8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6066e1d4b6cdd5299cbcd50f9352d87cff3f61293b0825c2ffbdf84af81ee437 -size 670329 +oid sha256:31ff4082e37e28321a4858789e601b997be42eca11f7db296642495d0f2ea5c3 +size 633823 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 2e6975efd38..03adca552a5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:98bda9b3bd09f15f4b2177219f6619ac00fdbc8587f5c736f5d1425c0b5af992 -size 684423 +oid sha256:7a9fdf6380c357e04d0183da22d592cd0da9639a411c8939a710cbc71277d75e +size 655119 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 34840183310..e776f566b6c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:738b400374a8f0e5c6cc777d8db20e094523805e9e5e32839dc5f869cc03c32c -size 599116 +oid sha256:234a917faab18591aadee127e228400b1490afa7c0f9124f92d72b22e8660467 +size 571688 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index a3d3a40c1ee..878db8d5d0a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d77c7e4a8bc8058ea52dd0e8a345ac19a834c49a483dd52f715daf7ffa3c8644 -size 681559 +oid sha256:5881e330211deb3ab1a336e73cee6c8ab811a8a7982fc3b06e3a00c117bc4013 +size 652255 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 933f164bb99..85376ffd4cb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:35b379ca5ad50bac2b823c4de2206dc53e0f9af50fc033dc3e60c87b7cc54603 -size 599460 +oid sha256:8b465eb29282ac4cdb3d8aba74fd52fbea58b11f107beb18502eca9913e346aa +size 572030 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 9cc6033ef50..dbfc8d8dfe4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb60375c80a1cbfa6e0d2dee27959dc14fd222630ed61ba4cb7e8ca66f352ae4 -size 750661 +oid sha256:6b2bf50aa52da95f679656b3534e779bd91f8a299d856a666b91c0f1a3b93465 +size 721357 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 5161cbacd7e..06b1649556a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ff9507c0e8fb5f52440272c352fdabc8898f0068d26a0c5f14fcd2ad663aef3 -size 665749 +oid sha256:8b553d782dc40e13776e5d0a4f3f43d9416957d51f4b028ffec96e7a6ec7bb17 +size 638271 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 1ce70ba4445..6d86b1e25c5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f6d40f72c6da98b1bfdf597a8121ae3fffb5481584a9097cfffe59d76a59cb2b -size 771745 +oid sha256:814f78bb4cdcf2dfe08c8de48333b7b8c1a5b34de770d8aab4858916d0330229 +size 742441 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index b0094c3ed32..3d9a95370ac 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2774127b3987aca27654ca14a3be1aafe3db6f4a5980184c61e5fcb481949886 -size 679139 +oid sha256:d81d017b5bc319c1e68d68d394affbaa3bb231909578f8ddd4ad9306b52471ee +size 651659 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 9d48b9afb17..b62d5d8531b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e72d525da51dbdfc20498dc0a8910cdc6e1968ff70bdea761f45f4b4104193e -size 763901 +oid sha256:927b0c8718a33210b6650b207e0499b74c0fb7659131a045ee2dd5cdfd21cbe4 +size 735287 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 500dcdfb7b8..2b16d72de03 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f4f1fe80acffd88f0e3a1638c6c835f8269bae8a894486c8b42060da8a15707 -size 671985 +oid sha256:0cd3279d7e978089e6d336d90dd2c37225546da47c108b510d1974e21ade4dd7 +size 644507 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 240cea5508e..7b9f168bfcd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c7ea7ca6915b8ca095e6b33b14559fa821d7a7577872ac363c2d37eab24b0d38 -size 767155 +oid sha256:4958fcf6272de8419d500cebb478d7c2876dd08f8f10fbd985b3bfbc5b53f114 +size 737753 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 92e17f50a53..0bb15ccc576 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:61a1733600165c627e9dd5b58c3779c8e02acd95e7230f1cb2fda7a91b2d69a5 -size 679581 +oid sha256:8137e6dbd65946b648b6f9d99276cccdadc58a3bf220f027b7faa8e8f7d91574 +size 652151 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index bd038db9c7b..9bef4dc6bc8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa86d82557696a0c16efcae2a7198effebbb245ddfcdc525448392f5a76d47f1 -size 760001 +oid sha256:92ba77b51ded52d8d13bc387e85653ef547e940e36ab7da3e84f7012de351c30 +size 730697 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 993a2db7fe3..b66fbe59ca4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebba73a5e120514acf32ca4f598e00012353c92ed1a63fca821a82fe75bb2872 -size 672427 +oid sha256:8f5f5fcc0204ef8ab01e09dd164da38e7ca6eec2e5de5ce237af1ed5bf02af18 +size 644997 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index b02acbf288b..70a6ec14d70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:91214377349eba4cc308919d6855c3ff41938ad6eac631957cd123816f8d0ea4 -size 841189 +oid sha256:3b4bc7161f0153108c190f4fd856bda389a2bea2c3437c63cb6ec581c09e0520 +size 811885 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 36b48fb4d96..b0fc6a5c2cc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b58759c89fd7bc732a386a7a7f3f4144c74d02ec8cba27db544dada454556b26 -size 747743 +oid sha256:fbaab2a72ad9c9ce87d9d16f9c3d6d0248841d4676875ab4bd640e079edfffe9 +size 721055 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index dca9beb49c6..e2385cca8ca 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d98fae10c1d044428433fdc6ceb2b9b12c7f2a17325f04488041063ffec41329 -size 834135 +oid sha256:048fcfda830f06af11fe17a9a5dcc6bad60f5f2169cc4e20709f8fc088bb2ae6 +size 804731 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index efe64c91551..4a447e4f09e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bcca2e3a7dd48b75b4d44f14ad2595559b674ff996036f9864236b23fded81c6 -size 740591 +oid sha256:35c2d6a1e65e4c1c373d0c621d4a5244e2749f24930964adc8ad6e2efd242d8c +size 713901 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index f06eb757e2d..e170dca9710 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4272b508a0abb081aa5346787a914d31b085176c44779a9ac9243d8af8c17df7 -size 649495 +oid sha256:9c6956d1f5bb9c3f093de9973296b78d45101465e58e1df8ea3de189397208a9 +size 619945 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index f7d93ab4250..c18242e7544 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:91b46b9071647a9ae72432b8562ec95c9de578c59cc9b4572b14dc0a3ffd32a1 -size 562758 +oid sha256:6361cf4462a0c2e43693da48b7875ae00849c05cf3178d1b2a4ec63caef30d79 +size 532616 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 3256522354b..d070aec4474 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:565c03582d6b1a35381e4d7118c7854dab504f763a631ed4713f6787e98f7a7a -size 650085 +oid sha256:c0b9a3deb3e4fefbf7ddf6cfd0b47cf76fcc9a9706c1c72bcaa8717297fc41f0 +size 619695 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 633c015b48a..31b6cb077f3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c094cd3eff82e2354ce8de3db725d47e00a452a91d1e6f80b57ee0358c46d6ca -size 564186 +oid sha256:e0880dacb28ea94b70e5a35c3e2397ca01e07a65610ace3e77e080267ec7bc74 +size 535770 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 483ffd16bc3..c3e9c214dba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:417a85b3ee5d8fa48957e373e4e16a923cf098f430715bf022fbdc223200094f -size 716769 +oid sha256:8e256aa049ae257fdeac14c8ecfaa40453559b91870ce624f3f67d809c90c67c +size 686429 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index a49348a5488..285d77712eb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c36fa0b4427eddefe45bb373b4e2d4900be7f594cee4dab406866d5aeda0b578 -size 629145 +oid sha256:b1d520ee991a1e533c8c9101ec60c7a63350e0730abed130d76746f3d42aff5b +size 598212 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index f73e4252278..2f8b6d575af 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8885340c4cc33599b12e08c7394058d0fb4f6c017d8c872ae88689d94f77ddd3 -size 740911 +oid sha256:80d62b0b28afc0daac2e2b3912aada65ed45a3bcabfa984ef79d0b8684968666 +size 708203 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index d350cfe0bb9..6407b65ce94 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ef087a64a2174ac65856b6b6b4e621e11bcc3a676fedb6f40edaf57beddd637 -size 652203 +oid sha256:49a2abdda8662956362db948903bf4b3fb033aef0b0c84f11e3aa05e3c1586c2 +size 621271 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 612fe5de2f7..d4485018285 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e35eb50f080bca92b511b329dd049db9b74c3b416826c718eb8055bbef1fcac -size 727395 +oid sha256:37327e358d6a8da2af6d9c9f650157e8094a3e784ca636be869a99cceab5b3f3 +size 694735 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 93ce48156c6..32b3eb166d2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d5b895a617db85d671ce1fb676a7fd0926432bec63c94f2f0284061b70270b5a -size 638735 +oid sha256:3185ae47b6b764a1ac0385d810de7a4f71a0f1c2731f8fafc2f26a82bfa3bfe0 +size 607752 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 663cd204832..850e3d4f191 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0d1544f6af44cf8e2f590db42cbbde19e921055f37ae01b12f37b65e402f733 -size 743081 +oid sha256:dacc2206d01800511c214f52690a75d4b7548ae38beb9f0906ae1447f0a17b11 +size 712691 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index a15e35b32b2..7cc09d919de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1e8bffa9aaa9556765a7ac73d36fe405f18568040395ed6fcd4980301b948af -size 654371 +oid sha256:7a316eb21a858feeb64b795f7df342d683b8895d220066f0403cfac3d5cb076b +size 624969 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 6ad4d0692d6..1a5c2ae058b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:230970ca2f0d36857a9cdf133e4e19e44869608474e328bfb289334cddc1e198 -size 729563 +oid sha256:7c97e168b7acac9ba7474217703c52089e8a3ab1bd4069e0c96b93812d079d94 +size 699223 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index b21fe4f4e6a..8b7a36ca976 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a40c434218c0aee7d979715b33407998a2344329045ff7195439fbe50b5ca567 -size 640853 +oid sha256:9a1906bfa3da0d022ceaec4b81f31077d092d91f79f1a9b0dd1c455a1cba8930 +size 612288 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 2217dcbd381..5befeae546d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d3c1bd9d756e7d22a397c1f944d8a66e930d309a990f50dce139d1889a9a9a78 -size 807199 +oid sha256:9d38da4d6b33787f12ff9835b259c543dbf294b261f51ea03dee3c83b92e9dfa +size 774639 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 5f07d46d6d4..9326ab6f72a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc1e43e32a77793e60707736f85ecfdacb6e8cd76c4ab73a341f5c610a66dbda -size 718933 +oid sha256:91b2c09f1be9464d5431cd77dfc01e4858b4ec00bb61e6062a52e109de53f823 +size 689629 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 1dafdaffb2e..7c32a51785c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dcf2d8f34aa38864bfd5046fab3156c2cdf38d572a82e7c6e7a53129c118bfdf -size 793731 +oid sha256:ff519f4ab168a1a27355ee805285b186b847e99f411d10939983a9fc397b8026 +size 761861 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 90254ad7db5..9e266735e10 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad9da5033111d732a8c2d68876fd0104c0d2033e0b3cc70066dc6afc68ca56e2 -size 705415 +oid sha256:23e64d81981a0138139bc91ec3033213d14e65de9d766c915855a453ecb2777c +size 676161 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index fd2afaae9ea..6730876b8da 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4bc003b6f85731f36d1cb58b84c41f69d77ad6df9e00dd26352da32d9bc42c06 -size 618067 +oid sha256:ca8efd3ac65b04fb591491ff6f3fe03077c1caf04926f4fbe115d43154660f22 +size 616882 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index c08023a6ad7..345529480be 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:37594c0a11815fdaf7ca45e97ead3653219dd360e90586a69c3f533093bd56e9 -size 534982 +oid sha256:1a54dd933925ae9c4a3d7c384343176ea79b0d40315a14f272a9c90656400231 +size 533798 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 5e025bc2a2c..96eecf11477 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89243f51870fe965a6b8f336a26d1b3366425d16fcb3a9be720a0db07115ddba -size 614464 +oid sha256:96213342468a411fc31bba4f9d7ea2fc9c57d945327185989bd59bccd1d15305 +size 614068 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index a4d0a050193..4892f4d0294 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4bbc0df03dcf0417cff9642af9d5be530a28f60b421f4b243c4d6d702ffa057a -size 534634 +oid sha256:8dddba5457a474607c1250ce61344495a2f1e9130212e59246fb8f90e9a421ce +size 533450 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index c3f86453390..57032053e16 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e8cf801c1ab8601b589754d44cc0b2b4b1bc78157cffc37fef81143eecdbdca -size 684305 +oid sha256:857094917d13cc70ce651a9656a354e69152500818082247bbeb4b96a03dc361 +size 683121 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index ca52de1df80..42d8fcab5e0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6628a1587d7ebc8ed556a58e5349c43bdc3685e44713177b8c2b0f47c7bc701f -size 601564 +oid sha256:a2744c87c1abd0edc4d1c118861b0847592d453a96e08abbaf3cf00875707f50 +size 600380 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 7d2cee63989..0b3e3668c73 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:028ab0cafa49f7f2837383e9dc458dbf4a6e4b5a7ea69c6a1b2a447888679af2 -size 703317 +oid sha256:b9b5652e4c68ad8ce6457601565519ee863c2b61eff8bc7bf6d25b58bc73dc86 +size 702923 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 0c692f47701..f7500b2e2a6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad4bfa0291fb5e9ee96ee6ac6ca81a56a80a48ebef78962589bf1f78a62dd62e -size 615692 +oid sha256:a5b92320d9e0ea9a1241665497da6cf79fbef67aac8fe905fd7ac9a599b7d534 +size 614508 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 0c7f444c9ce..c5647bfa22e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:40d51e3814b9b313259766c472a84aac2a8a310526b47450cc95609e0ca0fec2 -size 696263 +oid sha256:b66c4ffd1d60c40eab70b8d6c87915b3340fa249dce10566b10f9c472e53a040 +size 695769 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 927c4e0d609..47ceb2a0770 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d4fe82f65afe53244a3c7a210277063989a05e92e16f8efe4ce0e995dac1e5c5 -size 608540 +oid sha256:cc4149e3ec600b460fded43738ab0f814e85673f780dfd290f11d308d9bb78d7 +size 607356 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index bf7e686b3f2..96133f25289 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c11d7aa09d1c0d298790174e5c0817a1ff5eb6fd2cec99e92d1e9d8f6e8ff812 -size 698925 +oid sha256:e1d1d7375ee509792df7b70ac8a6ad631b3a5d0eb531b6beff2dcde57221cdbe +size 698431 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index a378fb40b67..f55ada794a0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a42499d6d95569acaca87bb2a879e02b11b9ba1772714b512121beccbe0f9eb6 -size 615444 +oid sha256:1d031beaba6087fe86c393fbef99570b3b6852864778ecf8daa1e5eee6e54d11 +size 615050 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 8d639edb5b4..df32117047b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a982cb6a43edfd5ae33e6275c2c24adf5f8e9923fd14ec3713516bc3e486db0b -size 691771 +oid sha256:e7503f145bb38e3fb551531ae302f2223a42e14e63219ca6b08661f7388ce3cc +size 691377 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index d73567d57ab..0786207ff02 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e69a1203320e8c05f17c7d4fc9c25dbef65264a2f8db2dc07bd14e887b61f12e -size 608290 +oid sha256:d9bb9fdd3b71a56219ecca65c9ada61ae6143c83c87cd10813bee892a7017cde +size 607896 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index d7baf9989cc..43436a22e14 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6cc5e8121331419135483b6747e9a9773c9914d063c334192926013dbe60bbb5 -size 772861 +oid sha256:b8f3441f6a95314354cea9e85035a156515716bc5e4528f90146b3ac9e1860ad +size 772465 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index aa41828a673..803f4cbe46e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8fbb90c56bdc62548b1d97be1cf7f8e587a8e2157ebbbcea338466ba02b1c6c1 -size 682425 +oid sha256:1a32d30609e1d43ec58a07dd41a8c6a1050b62833be809ba4fa277f06900b119 +size 681981 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 0bae3db36eb..a687dc4f53b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c9ba03f3b75854146b56ca8333ff1dcc7b6b0e1aad3829ba04126177f8bd74e1 -size 765805 +oid sha256:02afb01edae404a2a696fcea55e0319c37986809897b9837ffe941465b41a7c8 +size 765313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index ceacb24cd21..3f0eb0b42ae 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7dd1910ce348789aeaf178d2c3a20620b09583b48f25644bf09a6f90481054f5 -size 675271 +oid sha256:0e0b0f2115262d25827a426d0d82d5c378a476373138b5a4ff0a0a11abbf8491 +size 674877 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 09b4efd7eb5..e4706c49e52 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41267752202bd81f3c3d1051fdaa3e8c7a9a66a395689965c559e8b6e6d6aabd -size 710239 +oid sha256:5640248d6f9200e91911bd85eeb7eb94a99e32ce255b485f3b2cfd5b3ff2f3dc +size 654739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 5170cfcfaee..1c35cb28fa3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5ae176b61a49413e400511ec75e70e8b402d4f9883d63e1940336e7bc06fce13 -size 620295 +oid sha256:1220ac3b6720c5e7a9428035969808e60ecc45c9e74155f352622c06148b98dd +size 566768 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index a61f1ea8884..b790b6a335f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf0221e7d203868e7744420703346ecd06487d2adc51007d260da70d9e7e0c50 -size 707277 +oid sha256:301a1077eb455dd3cd98ac595a3b74d518dffc2955b7eea64da8e82984f563bf +size 652565 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index c7ea842515b..7838474b168 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:20c564cfc81557f151571142a7836ef16e303b9ef276ebc23669591d2d65011c -size 626361 +oid sha256:6749fea20ae931daa1bf13c2356024abbb36a50a202beeff595e289d01c4efe8 +size 572884 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 10040c3e0b6..482a7abd473 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd2d1f0f2cc3c1cc0e11ae42c218427db7cd7b403e5633cdf6a0dd8e7adea3fc -size 799139 +oid sha256:5dd1f4f015083488e96cc5b487e21155240dc4ef68ef99def6c5e3c26494c295 +size 743639 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 435a2d70245..3800db215de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f2d84102c2bad44da00bc6a2ca90866488735d4adcbd06ef564616e1521ae37 -size 705249 +oid sha256:dfa52a5c8f9995ea38f18a37d78d015ec0a5d6828ba2cd5afdb28eba42abe3c5 +size 651723 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 7c9d9be8c8d..0e1cad7e191 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca2af8fde31e25fa12b51bf4b806ee3b8edf4a7b3dc2173c26ffcb7324864a30 -size 788829 +oid sha256:633d2c1b0d0cf29dc48d693a9084215289b0ad2cd2c5ef6910f90864569f90aa +size 733329 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 85a9ad70bda..659ecb01595 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96bb4828fa31f43c49268c38e14e597d6f91ea91c852f640d823195517b5d135 -size 694939 +oid sha256:77d4dceef3b336f10c7e234c104ddb907ef75250a38309f5646876569eda0541 +size 641413 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 183949d5d0b..3bfdb888a3d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba419b3aaa003c9ac69cdedd27b01dc5896fd7c6719ec4071b3d3f30be2d9d5a -size 795339 +oid sha256:f0d06ea9e29125be9141ec6a55d9b840d205a2ac93654a7e763c4cab760a112f +size 741121 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index b73aebc76a4..13a12f21679 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0942a11f5c23dec5ed922e576cfdde5faec9a100a6369cf8030957e90f43c7d -size 716545 +oid sha256:1a6e53b2950258860b089058079d4f2867b4a7200a6ded33c067ad033c96123e +size 664597 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index cb6f5f47ff9..59ecf029b74 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c205f37794c0a9e42a57f1874998a97aedfb91f040c9443b9b964d6f4d87c592 -size 785817 +oid sha256:d720372cfab531ba564a7e9d941c3e81ac3dcd13441c00f14f68ac811eeb9cc5 +size 730317 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index fa6a6c9f586..21d53b23dfc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f6743e5840dfc4952f8f33d1e05489e091dfa82746e26c8c220179dca22fe371 -size 703373 +oid sha256:350874c29df1250b13a55f40e32e4dad93218cbbd27718252571839eb113949f +size 650685 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index c2f61b6cf89..59e4a4a35a2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a148ae4e579a0445603092d1b54407fa91c84847c407d67b77edbd08c8a2274 -size 710243 +oid sha256:e0d3c49ee07457080cadf7be2b1212cba38bd13c813db188cc0f2f39d850a2e1 +size 654743 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 82a28a2603c..00c3b3051dd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:80eba31f875d96927af40db9cb1b1574f7a6f67c08d169deadab1aeb429947eb -size 621089 +oid sha256:ff956860cddd39ef263585a268f7bfdc93af4e8ac13b25af1cd7811520909962 +size 567562 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index ce204b6b430..308c4e240a1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f27a2ee95128664e13e8d20f9071665e09c02108ca00f92d2c35cbfd210663f1 -size 708069 +oid sha256:4c583a374837aecff238f723389639b020873be547eedc774c78f7aea018da87 +size 653359 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 7d56031da89..efac72939c6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d93d4e7e5628b11dfd11510edb79c5f28e61c08f066d344882fa561084100f8 -size 627155 +oid sha256:2b26c94265ffdbff76fee049d75b6b9723a36c2d0a11e815fc7620bfa8e9cb95 +size 573676 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index bac8cd7df37..eca8250a528 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41e3e6f5f173b994b67a3ae6f9e89b13af32e094978647285d9f025ee8729224 -size 756245 +oid sha256:036f247d2d6408f757106d865f5e13c5155081fd22f898ba795bf10111dc0628 +size 673907 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 3780068b0f1..ff1ea1531a3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f106084685c42d4122f0ce795a58c2eade8004c024f952470582e0bcf64e7a1 -size 671729 +oid sha256:9e41ee106b39452ef6c863baa32ff7a19bee9e2e969573bcfffccb9896ef14f7 +size 575132 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 6d58fa9555d..f3481231a08 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46468b5433f4f08a987394847ff8313966f98c9ffb94e54f39a961e023a6dbf2 -size 754121 +oid sha256:8761e076c9d4637a49247e13245e787e5695c9acc6ed05920cd31302e185b4b5 +size 668083 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 42ab3b8dec8..5778a4b7da8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1be901cd18abad5fca920137f540d75c50e108927044c5bbd35e2ac43dba4b2 -size 670493 +oid sha256:df7c00fbb35090f468b3ceecf1132ed050bd8006d70a3eb7557e183484f7766c +size 575476 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 44bbe2bcf6b..3e489a50343 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dbadfa995a37ceaf82ae93e753dd01ed8985e1c97e007ba5660092daac76f638 -size 822481 +oid sha256:7df13fffa86db3c05a89bb9d85e284b956c639255366f4774c0fa7ab15fede1f +size 740145 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 637166b565c..2b58bcf31e5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83bf29a1e9280acd72aa3fdb6c6e3bcb9d222120ab1a4ba88f5e79703ac72366 -size 738311 +oid sha256:c1eb95d5386f8f070482f50a9d498367f953697fd071a5a113a8ddcb03761730 +size 641717 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index dc9fe6aa80b..b30d868f85a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af0425d356d202863bcd16385e1eba091eed483a20e8090530e74d77b6e86bc8 -size 843567 +oid sha256:723011679b1d616fdce373f43969cd031283229a837fc1b11f3edacc96eb363e +size 761179 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 9dda77e7ebb..c1fdfba1aaa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e564de57cf035a03c772a9f15293f49d49339576029046c8fec9a2af00ecb73 -size 751799 +oid sha256:b39139c8da4afa90299e8aaceb4e0f3d9accf926776cd5781fe5b6729e252d11 +size 655895 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 4a1058631ed..24f12400adb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f12daf1e3eaa3c43dc6ceae0d166d632abfdc3b6596f7d2649732413234dc33 -size 836511 +oid sha256:c7bad6aa0c30bbfa8df8d9639308173c8aad7b9bf5145dd1a87834b10ddd546d +size 754027 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 190b80c0de1..c133500d511 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7db83b8629003f7b7704fc990a140637a82c13d117f758e8f7c391197366e35f -size 744695 +oid sha256:d2a4daffca34561631ac1fdde277c64b5261008adda34db3fd270f505bec2d86 +size 648741 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 0c6c6dbe70a..857c5bdd8e8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a92453a5a1402f9934d8f5e8f2eff95a364479cc113d1ff07e97302a1e1b5ce0 -size 839765 +oid sha256:852e6ae7762247f4ea8ff14b170c2886d06a9ae79043697a8dfd26021856cb42 +size 755109 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index a1146fbe513..cd520875a33 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:919bcad3f687e886848b1639158e09166e05ef7f675a96094e299c856da685c1 -size 752289 +oid sha256:596c7095448b1e26d25766100f253935222a8b9509b7a798c3092df9022a4d00 +size 655597 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 9addb1e5d80..cab29c7643a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15628c798761bc84d0c0a0338490c345890e04ecf6ddc3ccd04008562717393d -size 832613 +oid sha256:5f41c96024998a8076f3e1e1f8b2ac39dd031f4967e2f13bba65106ead3f6569 +size 748055 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index a0b808de314..e500535ead4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58917a0dd1c4b5f2b29b460c584c63fb8168d35d62c2b6a8ecfb6078a1de984f -size 745137 +oid sha256:45cc58185fbb16c2331644791497df272d61b5ba9e95790a962e1831716567a8 +size 648443 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index b58885f8bd1..3189ed27576 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d62851ad8c229ed0e02649475083e9ef8278dcc2e720e0622c6445e896e3bc17 -size 913011 +oid sha256:c3bf09159875809054a4c036286ca9b764f05de32bcb16fe013742106a18420b +size 831413 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 92da52a0a36..ed00b743ab6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ee25d35985848fceeb6c88cd9d56912bd0e22e1066a4f183e792509fc8f3646 -size 820453 +oid sha256:60206ad0704012088e2b533dbe244be49be4edc58f1393edf9c949fecd0885d4 +size 725289 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index ea01d893c96..89e6a7cb4f1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:305dd20d9c1bc797970509410af4523ccb1c93fbfd3495c008dc0a522560cc8d -size 905955 +oid sha256:a183584df039d75d8c95f36b384de81ab9af63c0beba8d396ddf5356b2c81d2b +size 824259 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 84cc3d637ab..e7a8470663b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d973701f17c04f25dca6377ef6e6a6565c68c76f56cfa5725ed391331569489b -size 813299 +oid sha256:012f807cd8a265d6cce37d5aa00f0d2d5015fa523a3cb6de19be0339415264d3 +size 718135 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index d60f42d04d2..eb3636a4d58 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2452e729bc8f9f5068880e84325996ad5b0cd19a3feada75c02528cc7a862715 -size 720971 +oid sha256:5d1caa5ef026068ccaf4f1372ba02b82cc38ce0c8929171896acd39a373f380c +size 629161 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 38c9a3dffbd..e460a969a6f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e86ff7c221fb191d49d5c203952acbc75908d4c77e2599d861b8d2fe14b7edb3 -size 630979 +oid sha256:ea4ffd71c5551e3854d2f26cc3ec458d05fcbb9bcea79ee680cf5e2082a49d6a +size 542228 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 02f17b92b82..82821618e93 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28c047a4a113e01db356bfdb4648d8bab41bf4545bf7c7e9cf9f975d2d741446 -size 719095 +oid sha256:ece49c98b26a50888b259c04d78de32b2caa60273d81a7b2237268ed91ba8da2 +size 630147 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 88e8e67e835..7f8812b17b8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:475711ab8ad31ca45dbf0214782eb1c1362eea489e3d928d3605dbbd2b4bf42a -size 634035 +oid sha256:3a609ef0e50a7b82a46aa3b0e355090a63a4bb5dbdca7c5493f19831591094f8 +size 544642 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 7c798f9f95a..cf9141a9ba9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:95c653bbb994102131774c0ea9e5628d27ff22e02909dfee946cc3ab98d85c78 -size 786617 +oid sha256:37f25d24808a394f31dce0b139789f59b3ae3634c0710efa1f004f07df40c1d1 +size 697865 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 24acdaf3431..8d9be9877d2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca3fffba383b8b3216cdccff06179037cd13bb34a0fa32ecfa3ce4d0352d9e2d -size 697365 +oid sha256:be54f3cb82117c5cb87020c4207f050fbec64728e29b24f187995c5f79f6aa56 +size 608662 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 73a00e175fb..70b2202a3d2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ac2fc27ddda9d42fbb4d5f3db5cf3d6e19cf34e3477fc81c7e667505ea5e7d3a -size 810711 +oid sha256:de57181b7bd5c25607a256ad11063d4da0a23582826cb461d75785f4a0adac8c +size 717865 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 05248168214..ac9f5158061 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2a52155330225720f80ebe99e5e3b716447de5d503190d9375a13e57c6a8ef3 -size 720817 +oid sha256:ee28042afedf2383dba2ffa1e007f74d502332f82c95440671b5d233f4b16eff +size 631721 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 15c052766ee..6b4688cc0d1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ca994cba553063d7b09384ee307001c249b1d79b0e0c01ea8a008a360f51db2 -size 796403 +oid sha256:c98f38966666852cbb3bc4fdde45f53d74b5201ad996c87118762095f852da94 +size 704397 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 22940fc917f..50693ed2720 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b5d1d0335f90121343122976fad2de5bf54b6440ba291c3f34758e174c1179ce -size 707349 +oid sha256:f863509dc44dd3e86f7fc5c029759b496da782e05dc80d693dd7f04e1ef5b165 +size 618203 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 68f586f1012..b8b3083eb5a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:71258a9b22266a6edcc18eb26075af833150f97f1d85b69246237e35da82b9eb -size 811399 +oid sha256:72d01af759ea1b6a7162e09d7f7102637d9ae6579fda4b8336063bf4c684171a +size 721465 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 350eb82d0ab..860b5bf8bc2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:50cf6f15cb28a19a64ccbf9838d9c2f248953dae0239a1e1120bde6a9fa9c92a -size 724663 +oid sha256:0641ef5a792a2cb8fbfbfae60fce82546b7206643950ab782a25bd477aab6074 +size 635419 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index ab740ba20ef..7c73cad68d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:447b4ff632cc13dd40b5e321e72b38f1d3cac25836f3fab9d13d9fd8b3d1d7ef -size 797833 +oid sha256:e7587f048f5a57e199bff4a9fe970666edf2d180c63d243fef1d7367746e2b55 +size 707897 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index bfa99b8b45a..78950b883e8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:22517b2fee35f9e362a2f2be44a57dc3b3761d9d2728151e11ec60cddbc9c8f6 -size 711145 +oid sha256:d5cf99dbae696aa78bcdef9b39e866f6ecc5e3ca4a48a2988f6a486da67eaa13 +size 622741 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 927f1e304c5..90991531956 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af18d6f5e6d8476a5533607019dccd8b0c3bbee7c48f2b7c24474276e78ab4dc -size 876257 +oid sha256:72975f4f121babd6f088ea416d9f44e5b98c975620cdb2a53555cd081d187393 +size 784299 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 4968e981bbd..ee32cf2692f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f8d19a69fd57db94921aefd2742fcc005ffb0d3f9d3a7a884c78ac6fb0a18e4a -size 787153 +oid sha256:05d4802de26491f6945a7e87b6e4cbe45341eb50805aa57c2bb234ff3aacd8d0 +size 699291 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 5073847c1b9..343936b8fb0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ad8b2e38421a62f77061cbc4242a698a581db090273a40d523cf659388267c4 -size 862739 +oid sha256:c83ef1a7fc85069c6e483ecd8a114fe827c08bd50b5b92c7ba24f6de72319e3c +size 770733 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 77c2ebbb909..bf620c64d9c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba2d8a65edb92e3c21106a71277f2b68e32879e396fc59ad1db429d1dd585327 -size 773635 +oid sha256:246dd3cf44d1c14bcf9a329acf9a35d16b33780a0d16293311b6d0dbeb0670f8 +size 685823 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index f1fb5517d44..9546cff94ec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5c5d1a11600770251a3895fccfd37e527941f485caedca5c0749a5b7d6c831b0 -size 657083 +oid sha256:48ce72ce0c1967391134d7fea65575ef28f10a2d21c4459354f67e860dedcf9c +size 673559 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index b0539d7ad56..389ea2193b2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44be543bbc22c3879816775531b3ed5264198a58f255602b0ee8fb04f9da3517 -size 573996 +oid sha256:a14ea4a7d88ccc0e92e34fbfc9d0674aedb532367a76a8500fcdd83b3d6257f4 +size 539018 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index db238090f2b..0b8a935f97c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47c2540f241e62f167cf19eb57e2903eacf661f0a89f1e3062715dcf32ea345f -size 655057 +oid sha256:3388745895f856025111854af7aa335a8c317ef8a1f599d2e95f892e6ec76030 +size 678985 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index d1e794665c0..1d7495ee114 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:618e1be145e7e1fa864fad31b50954b074b8c0b8779d28e558de22e187e81a76 -size 574438 +oid sha256:7aa505413911f8d7537baea3d2f9e2b92e659f634a2e37260aa800d7780a000c +size 540250 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 98f94590de6..de8b8ed4173 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2d485b131bd87b5d436103afc6386f334d5db66102ccb77c031af902bcf61fb -size 723319 +oid sha256:184c3585843c85edaced1269486898d6be6259476377824369258de825a775cb +size 740587 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index acc747415f7..54d60d1d5ab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:017aa7c86f407f50c7cc83c2a8618e45401c839e7fc284211f06818b5071a9fc -size 640579 +oid sha256:ce60175a012f2b7fb3d189cd811c33cfc6ef7b1c6caa47ff3741b5108b9d774a +size 605650 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index ca0503afcae..be9a74181c1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:45837fa6b4dbfe7bd518f770f242abfe7b7233639ad6e981a543a6ab65c2365e -size 742333 +oid sha256:0eb336277a3171def055aee1d43aaee8dc30ced7e9b5cf51f57b18aaccfb40a4 +size 758809 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 61cd9953e89..b0f13cbddc2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f5923b6ac9b602c0f0acfc3e69a145dced338f3492916cb1519e1c777376c29 -size 654709 +oid sha256:09e31a0875fe76e4d1cc5fdf15aea81bed5d8721e9fae75989821cfeac7118b4 +size 619731 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index ddacce04c14..8a706a3e385 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:113e241e3eb041f4d1a7c2f9c05c340d4fb4dc54d01e7075d1c77cd579f4d4e5 -size 735277 +oid sha256:27a65dfc1b60a80f540c3f0646bb7a97b3f3c9e9d39e48c2a392ce8a0bdf5f57 +size 751657 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 0b0a5634fe0..e8ede17868d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2253e9cbefc217bebaf0e9e3e888686ffe576d8e49f03a4001a55a8284bdd1f -size 647555 +oid sha256:6997809c9a1d14c6033a20158a18e1ab57ba2541bcf39e8b4638b740ce6dc85c +size 612576 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index a5b5bedc433..0a38f75f9be 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f5c4cdc81af22140d74f54cb2479905d6a48c879f223bc105f70248d6472725 -size 738679 +oid sha256:89c279d01173c5bb945f0e5770bf44abebd16ca45a15377fe0603374a231f4c0 +size 762507 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 5fea73a539f..884e3a7e672 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e7da87e7fcf75a49fdf01afcb208e094e67ef9df2f105fdbcddbdb518193b82 -size 655249 +oid sha256:b671c631792122f8b161534f0720671c5b70a3a316d22b8489f50a975b01869f +size 620271 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 080f2ec25cf..2257723d5b2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:20abeb05369fc4837811e1fd4faab23dcdc36cc9aefbd2adf2ca878d1d99f877 -size 731575 +oid sha256:dfcbf4c7acb2efde228f6873316c999112e10b66524c2bde13a8f1020b2f1dbc +size 755453 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 977e3a3d691..8da2d1621f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7438928282b65cbe72ba6828f27369d4fe78b7fa6580249cc34085851931ff9f -size 648095 +oid sha256:130a7f99c4d4c19efb2f84c780d630a4fba8aab282c073b541d892dd4888f835 +size 613118 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index a9391797cfc..82f6d9255cb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d42b3a1814f8b22443e680e3e4ec9131f765ab61fae2878171b74f2f4dc4991 -size 812665 +oid sha256:435a767e1c75ff2a1650d5c66c93944767abaecc668a61407960788d7997a0e5 +size 828353 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 5d4d2d0d1be..b4dde3e1b30 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a04358254410a5be9b58f9c274486283d6687c7ff142c0f68edb4c9f359554a0 -size 722179 +oid sha256:bcc8db6c3591ee8097f990ace94031e0072d076c430914b21bb6a1bb1d23cc13 +size 687843 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index ad1ce2c222f..bf01b76d2f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:71db2a9842573da6869ba57a22dbcab467e9719ee541b0e4b5c7b773383abc6d -size 805609 +oid sha256:c31d93ed39932445c42c5dcd7e03627b36a4849d08a4b8f1219bda25182b3745 +size 821989 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 31537c8c6f6..cf63f4e5bc3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e6bb934a931ebfdd5b305c9dbe7084001a297f492ca939074c9e17354f86645 -size 715075 +oid sha256:89ba8c84302cee403704d01d0713d8f11a8d6a19e4975a183e34f593768d70a8 +size 680689 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 617eeedfa35..42d858a25f7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a75dda820ac41bfb7de6723b2819df8438e1e2eddb1ffe07945648f2973ff5d -size 679235 +oid sha256:4d6a3f1692790e4be6c75cfcb0b20d017d432c431e7e5afcd8130dd5a62f349a +size 678051 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 88a99f4505c..c2790ff1ed9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:63fd6da31d3abc1a3e30b8efd43aad863a5bc5a49dfc3767f3c7cfd8533742d5 -size 595360 +oid sha256:2c6e8c3a94eab75efcc1deb42b190ad19c406ff7bc639832dc942df92d3d5148 +size 593682 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 003b8b4e237..63e7841fc37 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ecf8d1a848b4f9f0aa78ef97e0d7d9edb0cd2eb4ea74619248e986e221205fdf -size 677161 +oid sha256:420752a04b58c7307056e525bed8f2acd2e3f31f9bc2c54cf1d69b8a96c0ba3d +size 675977 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 330186acd61..ff8a426639f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:827dce7c26fc249aa8bef6b58303fd7622eae706c8ad5615626125acfa00024a -size 594914 +oid sha256:62b8b387478eaa42de3ad751524c717be94b39146f7592e4c0baf4254fda49b0 +size 593236 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index f0c93d75aa8..deb527707d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2b9a55d178872d8b78d4f2a7d6c77e183add6b27962d22b70973b3e93f85b71 -size 745521 +oid sha256:0417ed6011f969a0b8994a83e67d388ebc45ba5d4856be9efa7c3648da121ab7 +size 744337 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index bfb80cdf3d7..2aa16557231 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2200989a831ded14de02b04c6d80559e97c3f295bdda4b9c053e6dbe4325e8af -size 661943 +oid sha256:9a6d84e5f0fd0fdceae9f40881beafb89b2e7ea180aaa1ae1832859d7b5a80be +size 660265 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index e513826bedc..a1570e4832e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2683f892a66a923cafbb26f16a9633b216d5aaa2acce12af71639b8aeaee34c3 -size 765767 +oid sha256:bba4aba652d9767a3873d19a71fb0b4bce02cbcfc0a461263890b3deee4cebe7 +size 765373 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 47604741c3f..c91ed0a3666 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ade21c13d418dcd242b005d02ce65b3acc62e3396ac96bc5d90f10cf0d45ed6e -size 674739 +oid sha256:23cfa8e1ac906021cbc21545f2e1cdcb700b857f630befc8c50801e4da0c84e4 +size 673555 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index d584fd7fa61..c9183becdea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be9a8d3ab01c601f3e791407d7f7f47d3fcac719949ae48fbae9241f1b013bd2 -size 758763 +oid sha256:d21a79529aa925f8be1eb0cb66979c46973572330d19598298050606685bcf6b +size 758219 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index c7520da12f5..0db9f4fde91 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f9c2b0d6ba6f3de851f4f827470f2ac2e8e36fa111e965e4c6248e71f9687b6 -size 667587 +oid sha256:0f8dd9be21b9765d3c0af5ca96310081f903aea3582c64824143cae51f9d0c50 +size 666403 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index e2ba3fbd3a6..c5f3e7cdbe0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:533aa131f8d866961f42961b636272a63e78df32659ac848874f85cf8d454db5 -size 761967 +oid sha256:7df940242d916902220554d7fe7f0a8c4725e0736b1645cff82eed2362486f08 +size 761473 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 7d4ce4b8c0b..939b6e239ac 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba446f9ebb6ee7cea78d1696e407d1914268f316c1909e44ac26abb0b96681af -size 674441 +oid sha256:c6db5b069980ca29a1be8edc75c428b51027c52e5c86a530ea625d2e1553f980 +size 674047 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 9156d08dc18..dfc7c1a19a4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9256eb7566fe19b428635d8c2bf5879b5a8b944c315d7f6f850ab299023891cf -size 755653 +oid sha256:b2ef19954c794df2028aa96256e31e5d4b1e34d550cc838acd3b61d95e264c4d +size 754419 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index b764073436c..08827382c2a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d4fb4099edc2dcd1c2901ad3ed3c4d674d73f467ccd4a921815cbf14bbcb100 -size 667289 +oid sha256:4dcc566af05460f903dfca23515b0206e12022f2095468d635bbe44a0ab8e0db +size 666893 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index bf2083c081a..238a5d519d3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed58caac2d73a8a600d72060754aad5d10ea3ebf66e3d852190c043674f2a7a8 -size 835261 +oid sha256:35299cd6cf244c83809a8aa5ee9d26414c90f5a296cf95b494066cda311d4823 +size 834867 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 23709447cc3..756c2c7f1bf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d66192c241fa9c67e4d0c83e58cf4fbab39f8e38748b8d31fd21d4de09f40b87 -size 743345 +oid sha256:c8bbc3236a9956ecc2d5de0a9b7b8b74fe8c425790020ed5ac3c730c4c48161b +size 742161 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index e2961143636..384e80a0d65 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ab632c4160c6522ac86c06613bc29b0cc8882697d630baf87c9d5e891ac02aa -size 828207 +oid sha256:88095cdee5321560675993e31b2bf723284927c387ce4355433207b12c55ebd0 +size 827713 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 029fbfb6811..2952c42e9d1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:60ec9b7ac78b8fcd615dbfe5a6f6b55dc1b29c8738de2434422764717a8045dc -size 736191 +oid sha256:4e7f281be5c286270c5f0caed00dfb22df7a87d6f2d0eb548ccc840092c0c883 +size 735007 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 3296d9bc086..8c77e11ea7a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9009cf763473ba828242aa290f2d8279b95de9431efd19b1ad8b6eb2f8e9cd8e -size 645145 +oid sha256:9f64523011862d11c4c1cbc51a1ba9f7d189b5620f8a0d1969b1fe62af776dee +size 643961 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index dffd2e6e2ac..d319057f5c0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:337fd645c03671b80a9f0e39f90178f58934593d5da4cade7233beddd62f426a -size 557570 +oid sha256:de562e62d51de4f0e42d26d6585e661a88b43d3883028931bc6d4a83364ab745 +size 554216 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 39af3544c1b..017b60003da 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ffc3df96de86077fdc15bca26e3165addd447e6e5800e46ca1aeaee2488a25d4 -size 645687 +oid sha256:e41574684594cf278565686f1ec993b05d5279387c20adce0b71fc2bc1a4efc0 +size 644503 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 2e6f50c52b0..84da128ea28 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b34119889b5a13280d621728d9ab76c39b4b1fb64bff48a56087fa80bd0c900f -size 559838 +oid sha256:836f5b9adcaa4482d5c62b4efb3bb8eb1f6784687d0082d224874ba0f0b8b7a9 +size 556630 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index b22df7a8381..fc196b5cb63 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7d74a38400b9abff0693afa954e24c6110b82a6071b56929dc57ae3b0755a10 -size 712419 +oid sha256:24fbd1ff753440e93df50adad8bf75cf04e2ad6e31709960d654cb4f7216d6be +size 711235 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index ccf50512da2..dfb5743e291 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a760a52489281af63ce7938fb563a9cb49e335a8bea54b866bed646b00ba683 -size 623167 +oid sha256:582a2dae7a13b143a0d469700d4e1bb60b207656feda9c0fd21cb00222e4bfb4 +size 620651 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 416fc58f414..66cefec45a4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:36114ad7c1ee0774b950b1b1d3b5ab20419a2c4c990e62011e075c5106e2d5d5 -size 736513 +oid sha256:573e1d90616a837b65309bfc15c93018ed422c1fb164646379318ff4c347ddce +size 732221 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 7fc505ca2cc..44ce4b99837 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a478705166f1398052ad81155543ce3a01eb90bee6da0637e32d183118b2f727 -size 646275 +oid sha256:741aabbf243a38417c6b540b191c2502f32d257bff5e3788dde1b12acfd051d5 +size 643611 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 34759865a52..321e7a2bb63 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07f14fdd72ae0fc9b91042269f0cef42956a0fe333fa9939fb9957a31f9f4cd3 -size 722995 +oid sha256:ac21fe69824bd25a25919cfa97f07c52173f5c98027377276475ec0f41ac29c8 +size 718753 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index fb17b472342..be6c01c11d5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:08f429cc8313dad06b29d2fa0f02313cd80b5f5cedce70dc5ee21258fd543b13 -size 632757 +oid sha256:d2e187712893fcecb10ec842d28cf1503a9290a4fd1a9d3741ef0c10c84b867d +size 630093 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index a6bfd09fb9d..9e4c3633ff0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d28bee40b60051d35bc3564b8be7136c3ab6a36fb4488645201700b79241fc9 -size 738681 +oid sha256:81f72b32bc574b415d05d33799edb664a23aa8da317944ea709bb8010871e33c +size 737497 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 58714a0adee..c8ddad28017 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28211ae333f9c959d485fc48cfba6996ec2d39026318b40a4a0727cb32d5f7c8 -size 649973 +oid sha256:fb5bf7499c3e23f13013f681c3d96b7e8a3fa4696c72559a0d6f06de0993f2d4 +size 647309 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 442d0fa1c7b..73e9bf91e88 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:896740605a1302ffb917381ab87d353e945c0f76d7874e522fb46130d18a8edf -size 725213 +oid sha256:349c130d2d579b61f4915742c227ca77a9367a6a06f65657c31a0209a55b8147 +size 723981 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 8fbba5ca037..16bac9dde17 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3e2a47c79ffc664b5893bb6730e2044ae7d830560f0fdd597ccefd7a553e269 -size 636455 +oid sha256:4174f20ababfcceb2dff4a7e23451c5c47c9804b8c4649e552b6f4325cc18c52 +size 633841 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index fd908638b27..0b7f876f9b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f9b852694d43499217d9a51d5113a614c8161faf3dc2cac6868b507caf203bd -size 802849 +oid sha256:26dd1c4e7b1d2e86e7d79ce500796c8fdcf5eacf8dc5179faab75241432e3daf +size 798655 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 4d7652fa77b..f6036f55b02 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1823bfde9e7924d48a336ed88bc0f053269abe5fca64504d67cdaa8ac1a12b4 -size 713745 +oid sha256:a7204dfb9bd0af1a3c56b25de7fe876d70394b7f827ace831009a4138cb58746 +size 710391 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index f2a7d3f1db6..7f7815a75eb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ebb83cc1d0d2441e19a972a18791fe99a45a34f552fadc4c97e0686d2976b85 -size 789331 +oid sha256:6379960a399dad4735290dfac4ef9dc0f2406c3653c279bf5ab19b70fd6c6d24 +size 785089 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 99816f5e992..eb289f61cf8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc64d42e8be9bc08de5678461482ff8af598c115252d9336b051701b3565402d -size 700227 +oid sha256:b82841507a202e53bc27f12314eeb5c1b3023596937891443cc58010bdda3a5e +size 696923 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 55cec04df3b..bcbd271a375 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4bb6655a75852d9abab3cf84a16f235da2a597b8296453b149b9067194a9b417 -size 613866 +oid sha256:0ee417d125da8919a4fd82314af48636692ddb4b8fa1247f3f80f941a0dabd83 +size 613470 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 4ca504cdc5e..72f6fbdde61 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7524f645f7037a45aae817993b1de6d55a759debbe1264499d20b624898b893c -size 530830 +oid sha256:5c312911fc1ddbfb60021d66451053b7b8a3a07f64d57402a4678354e2519b79 +size 530434 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 879762b78d2..eac00c6a3a8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d311047542ce505719147d56357bfe1359ce58acbdb0946aa0cde4f6f27a56c -size 611890 +oid sha256:21517a47d60a85d2815b4c411d6ef8c019d97fbbc5566e14bba85ce2fa12880c +size 610706 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 814fae9e5ae..694896af1af 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c7268412b7286b25009e9cb81b3a3fa83a2f8114855dacafe7569b47e7853ec0 -size 532060 +oid sha256:de186ed370b5f31971d4ed8ecb5f74a9ad3d9333aee3f6ab9008ca7476d2981d +size 530876 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 14b05773faa..3f13128a2f9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d4adacd561acc8b4f47e1c96c686e7cb1636df42cd6d59ddf60985ee5f5eb858 -size 680943 +oid sha256:26607bbe4314be36394b26df0240f47c661f4826576b2cd53fb1ca21faf386ea +size 679759 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 81c05d739c9..04191df7544 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8428b9fed67fcf0f5e0d55977e90abf39caf5d97ac4536d8e822433f766a8065 -size 597412 +oid sha256:5fc6e1dfec3b1ea2ff13ab41f035cf71a5259c288cbededbbed0cb4faae42f41 +size 597018 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index ddd213bac90..0db77efdc60 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:068d903011216e43ac74b2d8521c3b728335f3ee2f84eabdc0a15be36f9d684d -size 699905 +oid sha256:f2deb9d8da68587228849fea265d0511a9c63b639439ee6c04fd546f4453edc6 +size 698721 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 7c2565e78fb..b7ceb3009d3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:318c2b2239076685382cd04dd444ad4a3a568c23b49ef571f68eb0bd00778340 -size 611492 +oid sha256:8de076002fe1b2faa7b05de2da108837e6712977f29339d2bc89a6d4998f24c8 +size 611096 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 30b06bfe594..d975d2ec20b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ca4d53a34d43522a0575b47fb0d837424cc6f7eab15b57b9d5c9e6ff132c457 -size 692851 +oid sha256:0e3dd68b7edad1aee1d8dcc19d881e6cb6e70f38c5c9d416569dd5b0392d41d3 +size 691569 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index c9b0ef468f1..3c7a08254f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ec0426c8316a7d3f9805945bfc95d103eae891858c9e4eb81afa282b6b570f0 -size 604388 +oid sha256:838b70a26251b38f9d7054f80d49731712e8bf0d46c556854d9f7c8007d581f1 +size 603944 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 547f070da0a..4ada9075f94 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:335bd601113e58d116da5a99040f649ed9d2b4b24d307115a4a0559aa0949958 -size 696303 +oid sha256:5b9324cb8ae74df87e724d51ee6dd5a7bdafb3f201b31defcfd404893f74a9b1 +size 695019 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index a9dfc128665..8cb336a0623 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8e5d4073ba6210d810e3fbc14834045ee9e287399ecac518ab7990f2286b70c -size 612082 +oid sha256:e3c2feae1c08cc3d063b94d4b4173bd67b8c6f30866587c350ba826623a82aca +size 610898 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index ab12f727d10..9b054ae9d25 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e953f6e8019504213360e2b258ecf24024f0b264128b64bff92476395c7fdd48 -size 689149 +oid sha256:2e39b857884077e47d12ff6eb2fcbc2966460e581e4ddce6ba0cb70e13526fcd +size 687965 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index bb0f361790b..02612713853 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68d1d693af02fe47b392928b63aa5a460a00783c66ef389c582bc4fc00624457 -size 604928 +oid sha256:307f5d32bd136a7434e1229eba9f873786d88050b8249809edf6315ec112ef05 +size 603744 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 0222f61b3e9..3918da797af 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3e173ebde679f195a7124cbd4bef6280975da696b4570554dd9e0035c8607dd -size 769497 +oid sha256:a19daeabf51b0cb352db193d7455ead576a74d78d9fdf2c8da6c96e38752a3cb +size 768313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index aae823678a6..ef9a50c534f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a0ea0b2d6853dc944e1cb66cdc8f775e2fc70165b0b8fb60260b162c57b8a6d0 -size 679013 +oid sha256:1c7a6bd5bcb8eae78add85a62c227d2b0b91dc6c0424aad22a7f619a74416227 +size 677829 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index ce01aecf64d..9ea879a3417 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1bcd21035175bf0d790125c29f2fbebf637e41b49618693d4293c52ae42b4fbb -size 762443 +oid sha256:ff453a2861ea2e048308b064fcd89dd56fb7f02550bcf049c0cd30d376d3f08f +size 761949 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index b7d6f00a532..3f642e0091b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:48ee9516ed22261d755e262fa17128cb2b63b76f11ae02c0550b494d81c4c498 -size 671859 +oid sha256:1fe69a1f6241aec9d056727273206113ed7c03ac84f50ee447e52fd987746496 +size 670675 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index ee3b898bed1..97b20fa4c2a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f6aa9ff95b945f09e54f467757d10e93f7a38d872a951cca010209d9cf44ad99 -size 695565 +oid sha256:28b615bf2e52457ea06bf9d06ff955020269d47acdf5d0a689271b915af9af33 +size 656689 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 03b4a451644..28faa168090 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:22fc58582b731de1e97468ac4ce5277004117530d05b8552bd4705263f5358fa -size 611048 +oid sha256:674f62e3d517d01c63b90e5fe5118fe4f60bb56f0ef211aeeed6e6fcbb08e7ff +size 574048 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 3c3559d0c31..25bbe01f690 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a56229ec1468622586b3853df417315ad55b8ed54ba7f88b400a99c1331b0b4c -size 693491 +oid sha256:e294a18e24029bff77c01bf71b45bf85e58600eddb33cc045eb548adf8c4c8a2 +size 654615 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 06b290c184e..4886b075a80 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:636e8ab87c98ba33ddeb3fb43dbe5cf9532bf1afcfb854f1e13e1c5587486578 -size 610602 +oid sha256:21e15a30ee4d3d7a7623a8df1a5065573e145b4b4d98443347a63e54ceb5b19c +size 573602 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index cb3ba9addf8..c175ce05fd0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c87dfd3ba740c7cd9ee98bc679ccc3904c7b16668be699779391d2548a95bc24 -size 761801 +oid sha256:e1934511648003588ca89da14e009fa4d1897f3251253d0ce4244ae855177186 +size 722977 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 78bd1b35137..aadd71df8bf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1d82a3f84df0f34ba43653c9313b47052fa4db158cb41daa6d1dc002e8cd128 -size 677681 +oid sha256:bb6524be30b9af83590e2ad399bf1d9a2ff7540932c8dd9bf86a6f540a3da37e +size 640631 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index c465160f569..7715237b4bf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83a8f069e503a6dae8f12d40b2bbf1a9ef6b9289b9fe2237ff2b83d8d648df79 -size 782887 +oid sha256:41b494e8506b80d2f54c4e4b1174e1ef9dc3290df12d99e82458535b1905f3c1 +size 744061 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index b81904c3cfb..4ca89815725 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:14695c05152abc9539ff9954da75411aee421895b2b86d0df260fcb47e9b4469 -size 691069 +oid sha256:7df4bf8af42a128416127aaebb153e40df87a14a0240db900b655d7c2b12bd6c +size 654069 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 3d4a36d7427..a99e9f773b0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a4a72c584d4005d72ee78d5d4a689d045f86ee66b3ab516185204fa731789f1 -size 775043 +oid sha256:96c372291719b4289767967f764684bd267a6e1cbd6744dd355eb7fcaae4257e +size 736907 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index e2e36dd55a5..3e61a6967a4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76b1070393bf72fc596160583f1f505cb42416ccbc9e56883457ba25d37a78f4 -size 683915 +oid sha256:10d0819d5f404a771490e4f3e90d8139230a049e8bad31336d88ca9a6f058964 +size 646915 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 2ebe3a97708..b720f2a7353 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b7ac4db7dd6a623527eb7fa85cad6b456e4c946a34f32de959e16591eb3865cb -size 779085 +oid sha256:8e04663c2d6428728a1abe61317b5b192ddae83264a2ff3b5c989226a900c507 +size 740161 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index e0ebfd9bd85..0f5241738b8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6fe6696026f2374ec6a41fbe57b93029c1d26092e544d9c017845543b418d6c5 -size 691511 +oid sha256:89b27d52353f9c676b7229604f574b72a5b152527cce3f139e35684dd41e02c7 +size 654511 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 64c14157ea3..55049ba7c9d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46803bcacc659a70777303082abf80f2808a3776881abd54b51dbda24ac51fa4 -size 771933 +oid sha256:a8b0ab0f59540e49784d82e6097a522bc95328851521f843f5dffb852d25ac74 +size 733107 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 1dfc5d73bd9..557b556f9c8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47b8011ec945b8ccf4f9b4d3839f737a3285ef4224ebef4b4d9fd599b59d226e -size 684357 +oid sha256:65c57bf45c2a842d90662d90f194722325e8c095331aca1eb90f5e1e3f9863a3 +size 647357 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index ec725df1f62..16e227980af 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1528de2315dfda021239c3136561151ac6d6888d1a48189b9d53298a9631108a -size 852331 +oid sha256:d9f16bb9a4fe51fe4f1d1daefeca6b44a55b907cd3561c2bf5a6c6493e23e664 +size 813505 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index bdc496fc1ab..742a9738d1d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7290c8f75aada6ab9aba4eae9db7a486dff885a9f04a142a70fb749e766c9636 -size 759675 +oid sha256:439ca5c8dd969e345c1ba021aa0798d013eeca5504d4e1fc1caf160c5f7ce369 +size 722675 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 8de98c31cc4..9825a99e77f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7fde4b2c51c790d6ff8a2bcd39ff483412e0cc3ffd3947434084bdaa08aa79dd -size 845275 +oid sha256:4b698d22ada8ef1238ca4dfe617b93fd9a68da3afebead74c8f0ac80470d78de +size 806351 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 93c0127df42..1e14cc2066d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f3d9b00a6acbab917b842500d6c505709c54151b6699a2dcd5c7996f335b6e1a -size 752521 +oid sha256:6e8f9f7dbb47363cf7112c6cedb3c2a295de1a307f9f596858bfb00a4afcc959 +size 715521 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index f3c645773d7..00b8bd6f3ac 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0bc124be19deed90e6883205251a8c530baab290be799e4be83312ec78523af5 -size 660637 +oid sha256:6cdaa0f0059475439101db3fb243eab1361c94e26726607aa3911ed3e49a49b0 +size 621565 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 9a715fc857b..60334572668 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a7e10cdf750b729142166740280ec7c5a67513b2f63ea4e8dc0892eb0e99e7ea -size 573900 +oid sha256:7b5bf15d0f3cb56afdd3835c49db54f8b7eb8e4bc3d98cffed56e08824a9db5e +size 534976 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 0f6d8a5614e..b5da7f8b107 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1cd96357c6efb18dcd95fa0ab1199fbb50e6a2aa1b294e54f5240324f7c5be1 -size 662015 +oid sha256:ef42330772cf2db63bd4860399cc6c7a565c14d53a0b075d2ea3d1a31bd64c96 +size 622105 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index c18bae7664b..f4690e522d1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6240e5cc091189eedebedd41d8223c1c9cba754d2e922a1b35c3f50e85d424a8 -size 576118 +oid sha256:c6a86d4d0a9b96ec8ac30c821fbfe3932cec16dadb2b90799980a5afe5f62ff4 +size 537390 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 8a6b1cb014d..263969348bb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d515d21c51e581df3210a03dcf62424fccb0d4df3f371c28d1e345d7267fd8ec -size 728699 +oid sha256:45c032e84f4650447c779fd113f6025b311e09bdd66b79840fa0177ec7f9874c +size 688789 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 463e7b9355d..6ef4bee8b4b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:40889da0b8ab2225320b359019d4ed1d830de82ed7a9909d0d891a235849cf8f -size 640285 +oid sha256:fa59441d959f8ff57d58231114bf916ea9bd589879599bf2818987a930f14862 +size 600572 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 5ae54fff738..d4da5cb4e28 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:701174b4bb7236443430b9f22f7bafbb1fe2d387951283b9f831a3bdac029b78 -size 752843 +oid sha256:9d3d8ae5cba1d039ec2e8e9fae2345ba08c478832b39a5c462f7a7b85d43e3a8 +size 709823 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 9a7b6479cf6..e7d31ffa44e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90248bc283b876618c0942320166b09a93dcb80908ce75b6bfd8d02e815150e8 -size 663343 +oid sha256:d385ca3300018cba780cc65522e28be257ffbe3e89dbf433b38d8030f82673aa +size 623679 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index dc3ce6f8f1b..221c707f0c7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbfb94c14ba0d4234d566eca8fe7440ca424a2af2e449d3a68866108cb610f75 -size 739325 +oid sha256:6944470513526e094b0d61d3641225b9ee1bebea89436d379b433abe26efa492 +size 696307 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index f29fe5c0a83..9c525b3990f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ea4431f7d198cd8fb14fc0ca5540960d968862128b330e17f9403ecde5907f9 -size 649875 +oid sha256:2462aec4ad95211565fe9428cb9c8d0d9aa0a91548303279ffa8011d0fc0d28b +size 610162 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index d824161828e..ae888443d24 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d206b1c1daea214c47f6957307e01604e7c749f2c217b8e6438c0728752eca1 -size 755011 +oid sha256:13430d8311ed783fa1f342c6c15e8e20ab2bf16ad91dfc6a1f959dcd74f6b8e4 +size 715101 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 9485107dd69..3cf89d16a2a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d333497a063f98de949d7f3dcfd9c379340d75e18589b8d68ffc7ac89cf53e9 -size 667289 +oid sha256:80453342ca7cdec375d3485ee24d40b4f8a4472800ffed7d409bbf8cf0e02b71 +size 627377 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index d73dc61ebed..05518be8add 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5984db19ff0b7dc0c791b159414955d1a7efe4ab14e45aba67156c03423f83a1 -size 741493 +oid sha256:c78d00baa9c98a324a9c169d3db6948bbe624e778ace2e2029e6d4857cef7569 +size 701583 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index baec55825d5..8d62e333206 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d32c048a8a12f772de599ff6d73620c94319d651635b0229a4a3c94b8b9155fe -size 654561 +oid sha256:c5e79e03dcf448ca955f99224ea2ffa7a301be17a2c66b34d0153947b48dacfe +size 614648 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index e8faf7fc2ca..ccf7693470f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:400303eb7b54c52b61ceb4cc5b11f04b94a72167df1ca880fac9c78d5482cbf3 -size 819129 +oid sha256:e973ed355fa664a08c467e18e740554789310bb15ce3735d7623c036eea06d62 +size 776259 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index f32d38333f6..838fb189ad0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c28b5e1a3838bc34d33881567439a830d52530692eab437721348e707978f9a2 -size 728101 +oid sha256:cf68e0372e21f719ac7a3eaa5b899cbe7337f85fc320c7a809d87b726a5e8a72 +size 691249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 5c7fb4d6336..e54d8199ee1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:04c6c1144178929aac759e46e6cf997941081f34795963e855c8490efce8073a -size 805661 +oid sha256:476e7f2597af40a19fa85645565e2e2f768998d2515e92126f4b46e685cd2014 +size 763431 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 4d303b1fe39..b171f3fb8b6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee9b8e9deb383a951635696fb20cb505dfa3efa143fe00e254ec71df9f8b2a6f -size 714583 +oid sha256:efc94faf855dc097ebe6fd66df671c2afca2d69d50169c7d7350e34568d44aae +size 677731 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 9c66362b856..235e6d31882 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9aa0890046e5a473be01acbdb9a4765b819ad202d84f50fc4cf3af1e99ed0ae4 -size 622845 +oid sha256:8b1db413016ab05e80816d5f02483a0ff51e2cef550388a654dee4610869b1f4 +size 621661 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index d63638bcbb6..35205509d91 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:95e76af0303d00212509c202ae6d2944be75f1e8dc460f4cda6740700afeb65f -size 539758 +oid sha256:d1eed6af15ec385c26a4bcd86d45e9d6669ca18e11beb601efbe005135b5266e +size 538574 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 8504cf21341..f8de4a0b0c1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f3eb1d2a1e7e6860d5d97d625508a7103e72bec0486dde6fdeeae99b17c60370 -size 620031 +oid sha256:05095fcc3fae62698d791b3a32388897cec57b316ced0304c13701468070b732 +size 619637 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index d3007a744fb..fca012a36c9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e2166a1adafb144c9117b6c6dd34aeeea430c75dd53d9bf1700cb902ec03fa8 -size 540200 +oid sha256:87db3f7f832ec0fe889a258fb1eae1b0ac428362d7ec277beb25b7e0a18e28d5 +size 539016 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index ae2fe9ef04b..09eb2c569cc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:afabf7ea102a27c49144685cf50e6a67156864f9c6b8346ac12ed30358fb0712 -size 689083 +oid sha256:a58445b983e9d19c150d7998ed7dc4bd1cfee439db3301abd1f4fc1e6e800062 +size 687899 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index f877c78cbef..18474c3617f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:762005f7af138f545ef9948c191f69969038f6e18f1357060e8a529b7dc00257 -size 606342 +oid sha256:c4c476dbb1b2f058e60622811c0b846ae998b1885f97997f92a2077b4292d1ee +size 605158 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 31af8fcb4ca..f7b31caa2ab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db2b18f7e5e24481040c5553509c30a4ac7708537009369f6499312f613fe847 -size 708095 +oid sha256:ebfda5fe619bf36727366cd602000efcf3a8b3741ceee1c54405ffd1d99355c7 +size 707701 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 083768321b7..968f4fbce26 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3361a24c2b3192aeed7900e5659137e8ea6b02e5bfa1260bbcbe3ad17b3dbdc3 -size 620471 +oid sha256:1fd0e8ce8be1577a4ff8fc0585dfd84faaa8b62a149a2c01d5771e4cc4ca8f89 +size 619287 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 0e4c634e2f2..cb62ee17958 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f74c3e396034f87097538d15ee63a02c4ce20584145996b60dd3c3bd7cb417f -size 701041 +oid sha256:9c3cd0d9c620f0f1df9983932560b30fcafc7eb58d6b6e977a9f7324a4fe94a3 +size 700547 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 49739bde153..31ffd60a4f3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0559e1f367bd61cd789d22b815946d82eb903e0f724c87053a6f5aefbaf1310f -size 613316 +oid sha256:9583f49dcf4b939a23ca985691a7d556ce22b8dc5c3aa6c8e183a22aa4dc4119 +size 612132 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 283daa2d1ef..ec569615652 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2140fd425532aea727935538d4d9b46e8b402e89c282b802111001d904028cbb -size 704491 +oid sha256:35745a18a9a363ff24bdbf76072836487b5948555d909981c3f35102bf1594e7 +size 703999 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 48c29105767..de2dd17ace5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:37171ba77a4ef598144bd1fe165f2cdee2621b33519a1de020741d3661342667 -size 620223 +oid sha256:7d29ebc3e187ba3059108029ae7e9c585675b7e91de6db39e97fb4e79aa3ff21 +size 619827 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 65b6773a023..e1628429249 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0670b52f2a7f700f9163357e386707c1c1780599c679e8af523a075e12613981 -size 697339 +oid sha256:1ca862c6e1a54d22531e9a770840741ab2e99bb2c010ea2f1f87267d4331ac3f +size 696943 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 777b79022d6..49c11530451 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:614e46e62190e97205d3c5ad9cfad60e74b65147d4ccc4b528ec66cf87a878a5 -size 613068 +oid sha256:f967145b6396849495700d59afaf633cad48acbb63dc9561c0bc99f0fdeac038 +size 612674 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index c3dddd51e10..fb83bb5e4ff 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8178d130553155d4bd5e28c34d4c42d860f5013a6e7d5eec595d72090219bec6 -size 777637 +oid sha256:3fb58b567d5784eb6c44153b3be9f74e3c64c9bc2406c1cb6dc97b6f9224be12 +size 777243 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 515608b6e96..2b0246d39ce 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c6fb4dbb9d1d861c1d15a5ad57a9d1a6a62402a2230a1a3af84791b96d353973 -size 687201 +oid sha256:faee53e011b797cb5703e289f6a5112eb9907b625968c6e25d491667a4f0bdc5 +size 686757 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 1c4c47b3f04..790e55f7725 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d2e16d418cb4dc3cae207f71bc52c4a49e752550c3593e89c801d644c9cb495b -size 770583 +oid sha256:43534c076584e2c28d36f39b9ea955594d6a1042739ac2fb0a81dc63cc7f3db4 +size 770089 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index dbff343ed71..9bfd8d3a051 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6669484beb1e96ecc988c856b258b9bc143e72eea43977396dd3335ea10a28a8 -size 680049 +oid sha256:21441a567e16615bbd72106f317f1840f5190e94dccdcca3ac7034d6e6225bc3 +size 679653 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 592dfff927b..2997e75258e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2085f2b0ce99e64e200b3cbfa69728fa994df8beffa2a86e7d90bcc95ddd83aa -size 739767 +oid sha256:829f9b5a04380afd626237f0a7c76b4968a8feb34735833e29261d4e0c2e28ec +size 718505 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index ce88c1f9e75..1bb590b2335 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f94dfd93861b2c132a67fd5f996b10f987a5479a40102f2c9d13f83b51e27a1 -size 649183 +oid sha256:6707589e8002a2c66ba6a3ab5cfbcc30982d6035e35015302f6e79fd9d2546f6 +size 630535 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index f2af85d4982..d785e14b306 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a23df02abdfd5877aa82bc855e986cfcc5fc587aa16d38a66fe49aa3d69a1bc -size 737051 +oid sha256:d8af25a933767472b18c324a60d4fffb6dc834eb325949edc00eee99459d3508 +size 716135 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index df08991fbfd..b7e341b5b82 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:00facbe4690a04910ea3055fe9b840fee794db619e974fab9bc3406ff2c042b3 -size 650019 +oid sha256:84adecc200f464798c7eaa59bc547361288b8138c043f3f30000e32fd1155a5f +size 631421 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 9eb6cf93813..36d91230da5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ea8b2f31fd39555c1361c5cca059bd30a5b350d33daee2aa85d4f32edd66cbba -size 806005 +oid sha256:dfad205f962eb5190b92d2cc93fe2ddbdf4a96b856f89bfbe65f96a36e8edfb1 +size 784741 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index c2a46c08ab7..85375626adc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7beb11703b72c9e9233f81e91a75f74eb2d06be6998a7f6bf9e5c29f4057ec5a -size 717591 +oid sha256:8a8c076a61c6a72a49968cae8365f53ee53a5898707e9c1e422043bf2f17dedf +size 698943 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index f8d5c9ed389..fa6ed63b4f1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:036f91ef90dd36afee83072d49264ddc714af185291dd5209a99b25f1d5d89a6 -size 843665 +oid sha256:7ef14e52432177cd2d4d93db240cc78c6245b4f4870cd09f70b02241cc82c2cc +size 818703 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 973eef6a8d2..d69d4b25e59 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:893ccb39357b75dcb6e2038b9b1887fdb55bb21e01713f8a6c5090eff26f6580 -size 742771 +oid sha256:f8bca6ffe309f6ab03c9844f86508512cd7293981aa35dd953ccad9556a7792c +size 721951 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 696177eb6c0..754e4c2ce3f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:acb3d2e35035c55da113348be290b1c8f45e05e067bf0f7db701dbcd2ffec833 -size 830147 +oid sha256:77f491d0d51c7427c1aaab7be1715c1406bfa44e1874f1de7d27657104b5f5e0 +size 805185 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 8423b4f6dbc..cfa6a5a28b1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1aaaf1ad7e0126396618d6f5ec42cd8c790fcf9afdc831a456fa5072bab2bdd -size 728463 +oid sha256:866d21f663203e2821c5f36415e878f6d9f22632f108acdc278681a5485584aa +size 708483 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index e4739f55515..36ec70023eb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5e744842e436d7f55590402daa3f1308a806be136c4cce8d2a36fa9d5dcd947 -size 836461 +oid sha256:21ae1a6ae80c62279877a6abd651ca070020f3253bb8b91f23d471173bfa5bc5 +size 814013 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index fa7e5001d35..b7f685c3023 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0330e4dd6f033a788c5b6ac3cd25372a4a7eccbf4208116c4b9ab104aa89a535 -size 743459 +oid sha256:9c36a81f0a2de4c280cc0e7dba52bba1c52434d831aac411229c561f1b208e20 +size 722641 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index eef477ecd07..f9437847adf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c7d70fd0e5ce6de4bdb8a7e31a918eea1d4ce664a5ea75610d3f1f93091025f7 -size 822153 +oid sha256:c1ff1a86a78690260d44e11a040f31ef87beeae399c702bc0da8bc21e65f2540 +size 799805 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index dda44b517d7..a8bff328693 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:194fd7c8f41fc42d184e3c2f7915c69536767cd3726e4b7964ed5fd84293d3a8 -size 729153 +oid sha256:3b1b09da3e1a867edf8df8f59282ea9c62a4cf5b27794642ac48a184745f8279 +size 708383 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index a4c56369ac1..b92ecb6f402 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2950cae1cf856566a955362d7d2a89e3412cae8d21b88ef94a75f76811f2f70 -size 913799 +oid sha256:7a2b872fdd8b0e780354cb5fbb450279581da1d9c94228402327ae591681ed52 +size 888935 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 578e029a6c5..02ccb37792c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab4444176578233eea2fafa8abd23947d18443d1d965c3d5b167071d13465572 -size 812757 +oid sha256:2f146d5be2b10180131c49aa95c3a31e398f5df0f6f6810cc10e94e859572e91 +size 792185 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 099b52e8605..ad83adfb5e7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d88866790131ba477a2c1c54192e94ed51d22e20c863c8ab2e8ebaa60cde3730 -size 900331 +oid sha256:bdd4e3190fab44087f4836351c229f6089c8974bab9db91fd3ba15e63f57f36f +size 875419 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index e1f6dc06e71..51f6dbc16c4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1a050b85c3af3c4b774d2ce8d285cf35ba2c40f4335332c89f7a4ff751b81932 -size 798451 +oid sha256:ebe054489dc58e1121e38f457c554c270b4f36a37140bce54da534880ff2786b +size 777879 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index a41c0c2c7b5..9014a78163a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:14e46bfa08f89020b460dba291cd1235287c2547cd5f94c735260727c60fd276 -size 678445 +oid sha256:56d1fd0656152bcdaf0fff6412e37dbc5257a0abd5a96b29174560070abafbe4 +size 652891 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 007c09a259e..131e8048d44 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7b9418b2bc2af0318e22999b51b0cb653016a38c9798cf9dc045f29951d21723 -size 581152 +oid sha256:ef614095dfd19a600c413e266c0ff62c3a560ee1791e2f8f2e020042ddf25724 +size 554906 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index d3d2074e254..a8ca2fb6ecc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:29b274d10b72a66cb9d7ced9cd2e7758c48e9466993395567c01496745765a6c -size 676865 +oid sha256:041dfc2094c4ccdf35ecf67083e3d5bf237769f77c2348ba88c1a86c25485ed3 +size 650323 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index f724c022077..800efc06bf5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e4befd7b7f9eb3fb4eb8f23a551296c3e8d837209cd041870b42c702a702139 -size 596492 +oid sha256:ac817660748a4ac8cc076d75532001c6983cf4233dd67abbc35f009cf169aecc +size 568766 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 31924c7dfaf..255d347fe1b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3c2c9de917efd5bca03f0bb6970bd0f7562d7746715f27d0b9f20552cb9c957c -size 745621 +oid sha256:de6e31a2152c8a34325ad66479238b0325300a0e8770f9300d1965d93f194ef9 +size 720017 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 7c7eeb580d3..4d95d7e9b96 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:98f5fc62f772e844cb2a5623ee3fe19b76ed5ecdd3c41eb07c9b02959da82342 -size 648623 +oid sha256:d6a583c39528fa375b20297afcfd931c4b4568c851043697cb13bc86b8ac44c5 +size 623217 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index d22a05c8762..494dc197eba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbdb8b9b286a9b3064266f975dd4f6a639e1517dfc5155579c4a6674b4a4899d -size 796453 +oid sha256:e89a6012c3cc0c3f75edc26d4e1753096285d3eab5bec0a4fba568b8aa87b6ba +size 771095 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 133776ee2bc..47052d9d1e0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a683585aaac482386fdf4dba8a0d6f877391aa217fe5df53367b2201385ea9f2 -size 691315 +oid sha256:b917f639e3a291100b4a007f1db56cec5189caf3716c8fb8292c7defb9c4f20d +size 664183 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 7e4af759e1c..12274ebbd7a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2cc0713d4abf561d50e0bdc0375bd0aa5bccba740c6d6ca5809c09d2074b4a05 -size 768679 +oid sha256:29ee3ac88aa46d5912013a979840a9040ba89bc0418421862a0e067f955fd34d +size 743321 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 1f22c10da1d..ca0e8b3c415 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7fddb26d5432df7435691fed21223bdbcbad2648df252ee7e3a57b0b7acca045 -size 663491 +oid sha256:2b3c5fcb1c04ecd0f3c650ebf56885b680cc67afda7c46d68ff3e8910e7eaa38 +size 637197 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 3c898e53012..bad727caa5e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68df2d1854b0ca80ee3a86b12e1edb3d473c51d0ab7a0056188d0c0eedd317b3 -size 791567 +oid sha256:0d89afaf7b1df4115852e59acb648e153ef6a645d55864ec783a0b8e25c46c03 +size 764877 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 7719d91555d..c6d494d24cc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b9d3a36f393eab29e4ac88ecca56bc7983812336fe4a9c417b6c03522db3b004 -size 708481 +oid sha256:56b82c2b2c21c8cd9bcecd9c0d582635d9fca1021399397f8870038f110fd02b +size 679375 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index f118e96fb52..8244ba84a70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:374fc6365e4ff31363f22e3e496045cbd8556a10db7fec1e00cbbffeade29957 -size 763793 +oid sha256:b5d42db3032034c00e1fac931fe841e53ea3b5f5ed712db1697730ad22b4d54e +size 737053 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index e904a6671e9..85b667fa68c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c2d61a69dfc8b931bde98d2a80ee840f82ce4ef3ea93f3ab7a8f2f25bd7176d -size 681447 +oid sha256:0276bd06c00b06f223884beaa4238036d5a4692418930ef8025d795e77c8f8d3 +size 651601 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 02ae2a92814..91460120fa4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:371fd8570f8ba0b14297f74e2e11b032550be1ac7b003b123a9f3b802d6252c3 -size 861655 +oid sha256:8708f2626e81ba1f33711e506114751bf725b7c49baf61a78247e5c3f241173c +size 834225 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 12749b0a416..d53d731e8ea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d0b454b9b01ec8ad936f5afa880b0aa1dda78b79ceaba0cee343ecfcc79578f -size 756615 +oid sha256:554f571c7bbdceb7fc1668a7fa202dd1943cd1258a613999b4bfeb68f0ffb630 +size 731555 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index c4b98861bff..e422cc58189 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8172bef19bd352d29940b2cac2e531979a029912115fdc7273c063f305e6cbb -size 833831 +oid sha256:6a7e09ec5671e67d51342e64f2e0f3fa4548bf320a1cfc9382604643cfa186f0 +size 806401 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index c6745463747..b0f5e4e1aed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:349f2962b766644bdefa386300191747217addb2724ad4fcea957c94fcbcd837 -size 728841 +oid sha256:5c1ab50fec646d90968d1fdf0aa8ab6b5a0d5c0af0b9003b951f0240dc58da4c +size 704569 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 141be98e751..c9302688cae 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f5ec277f940487a32d25576846a8a808d4a149baffd157d978b3232b48f56be -size 640703 +oid sha256:b94641982967b92bad8ea63f44e1ff027bd1a89e21934a296ffdae3091089112 +size 657181 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 262841c6597..c4222fa5b31 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3522a604b164d571f05c6be1c5c24f168d8efd017e564eb8ae3f29bf29b0490b -size 557470 +oid sha256:77ef1c3ad7098132f3ba2c7b1e3342fae47f41239203f37fb2ee987842829fde +size 573454 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 56f143a6ab1..063240635d5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f41d59d1eed4f215cca3a60d591c4faffcd28188c48f3e965a903b297aefd31a -size 639123 +oid sha256:80610f020b93a7929cfdda304ab464cd1d15de6a337a6d9017db71bf9713ef54 +size 654811 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index bdf880d4b58..57ff1008863 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bf28126110a7dddfab25800aa5eb2efac93181ab19aae5acd868a1b88438786e -size 556382 +oid sha256:3dab8114c75ad14cb94d8c7df26a0a38931a5e8c11562d680448347c973ab231 +size 573156 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 963b0ac44d0..3fe5b2b15e0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0f0ec4e3acd0b1daefee1d1f5d80c5c83fcb83ace7be97ada49ea7173f18f1f -size 706991 +oid sha256:99f7c7e4102da56ddb415cd23ca66cd6280ac1423c8caecb977318003efeddc8 +size 724207 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 3001c9500b4..7b565b6f10a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:463f6836deba055c772008c2981f24476cbc305294f5cb89f031e34c0bebebac -size 625583 +oid sha256:a2f26c517b276b1abd4304bae987723c9e5f4fa110eb60c29ab42240eed000aa +size 642355 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 67649c521bf..aaaead6c991 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f265cc9a66512e3c6ae9137030c9376c1918edb7b537ec244d2f4831ef965ed9 -size 731281 +oid sha256:9b91e0616d99c6708409d48993b3e2f28ea5049a44b890a8516a862884e42656 +size 747809 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 80447f0da3a..f23499a1394 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fe7326f67e3e4357761566afcdc44f32129e7a307aa711f2b38f9450f5a48691 -size 636653 +oid sha256:01127793f0d08c93f6ceb77d3b02fc4f1683c6e168dd015ad68b90654e8e82aa +size 653425 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index cb2cbaa7772..11fcf579ce6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:803a91c2658b07f8bc3b4469494f084ba8a3e8a2b4ae06a11a602f354d55cbb7 -size 724227 +oid sha256:7acc4dc55636a532a9432f8ef5fd43512964f7eb8db9a17c68e1eb23210370d6 +size 740655 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 31fb9f67410..a4fc7fe32f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a2d62441ee1a89e8ceb98b0cf3d4177e2b99b2497cc536d5e354f7f6120e9499 -size 629549 +oid sha256:84297c8ecab43361c8f5939d9c3abb92a17ed8d5a7e41168041e825a00c380c2 +size 646273 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index ed57d102474..6b1ed916628 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:39bce9ac51e50caff6affa0cebf09ff04e82d2c3f196a330cae9f8bc88afeb13 -size 723979 +oid sha256:b9181356e8845bff0d63fe97730ab84758734bad77f3af9787bb69411fddd69d +size 737299 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 1f15c05f00a..d01cbb59f1c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:59e0b6db2e8535028c200ee2230c5a5acf021cf9c0887246aa6d682b61d73cf4 -size 638575 +oid sha256:0a8dfaf1146d61945c3270c7b51000232101588791e17f3b0bb7a5fb8dd0e664 +size 653227 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 5aa0544f078..cf546b3fe3e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf80884eb7f4b8223729f638219254d4cb5126e71f9d41696fb189a4c2427247 -size 717515 +oid sha256:a78783aeadf1e0f4473b631bfcc65765de7ac7eec5209f3ce8d60b742ae5112e +size 730243 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index b77143d773a..24361ae5f83 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f3c5a1382d38951082da4cc9dd2ec92bfd369f3d15544462a389673e6b22caea -size 631421 +oid sha256:376c849d60e4c393002a60e299098af2947786db9e8c7340f235d235c9986a3b +size 646123 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 7bb56868108..d7dd4e0f61f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:37415b1ff4262591fc5b1e7079c477a4bb7df2ce785572a24c7508dd40f707ee -size 801021 +oid sha256:bb515ffbbfe1990abde9962bec1e5694d19488e10629239bc1bf9683251760a5 +size 816809 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index efa1ca711e6..5d045236308 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d4dac1c816028a26279d773800b7b266aa4a275f87a7b17e3ba01182ff88a1cf -size 707181 +oid sha256:e3c5ee4cb3e08a2cfbf97a53fce6852f173716fd063e5df94ca8499fd0af82d7 +size 723955 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 6fbe125b187..6e74285707e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8cddae1692c4d5c46a5bf1584697505973729038f9ecff34c3918686eff2a43e -size 793967 +oid sha256:8ed85a5e7c1c1d14f255ec37245c97fc7a706019208a9e625d0146f8c221aa04 +size 809655 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index a3163453739..ef21032881c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d462a06210752e04f637fb4462315f9f2a922b4bc376ca4a98a70c72df7555f7 -size 700029 +oid sha256:590c576a36b382609f5596d93b437739fedc31234cd9918392aa001a990e90f4 +size 716801 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 74f5ea776ec..d3704467476 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e9437de14b6e53898b732c3031a12ea9777d7bdb9afda69d63c0aa20a936d0b -size 773807 +oid sha256:0571f33d3cca68a51d1b510700d6d993ad59013ff4788246c3d099f4eaa06035 +size 820871 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index dc6f9bbc285..89daf007982 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:20279fe76a4f9711f1590fcea8084d9ba51b33d71b3f4bbd8098e08591ca60e8 -size 775133 +oid sha256:92d5c9b19fd1564e7c54c728bac60b2a5401154338d731d5e3e504bef75b060a +size 799209 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 212ff278e23..81481abb71f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:56d8f8a359e61560a37ba35ff3a9622f78e8321fcbfa38c42c2d549ffaa05da2 -size 740783 +oid sha256:22862dd435244b57b2c94fc4924d337da7a9a1b430fbaf081d31d94014f3d52b +size 783605 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 2bafd49b688..a525d13cb7b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92e887d814c98f2848e4fadb5f6e1f9138aa6b9d663d81b71781a3ab40765e37 -size 659623 +oid sha256:82228c8ea6bf5642e7283745ece61859b33d51ab9bfaa876e0650c2f5cbfe9bd +size 696179 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index e820b910bf0..87d1ffc1478 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90a8171d80d59f034a173b56ad60a116ce08ab24e65e8da40d3454893dfba1d3 -size 759551 +oid sha256:a620f6ad3dde05e923343c95913792c1e400e53c2b3ff73f41d9c49d35752d93 +size 806615 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 726b3b24cd7..3d38f3448c6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:08456730ec0c8224234fcda6e78bac24c8ccc3257edef7eb86b53f9a9f7eeb95 -size 760827 +oid sha256:d1ebfd119f6c1148e4190c4c68f6e2b30134432c9be35be9de9789547189e7e0 +size 784901 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 79ec26a5d54..566f3d2076f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eee68b850887aca350769758b9a78f08807b161eee613555054c7440af6165b7 -size 726477 +oid sha256:b1008e4ef0483c346d0487f939463ffe6283c877d4fae933b9d93a98a9981b40 +size 763871 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index b513f8491f2..6e5025c53ef 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a679eae675660c7fcf4dc814f1566b539b6c84d6dd175ab44c8bef95fb00432f -size 645315 +oid sha256:a8692df9fadf67da428a2b5ea9255e36cfb8d2829c832e4dbcae1acab19adf46 +size 682661 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f1e5c7ae06a..aede0dd6e90 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2cce4dfbed2701675c4ce5e4ad55534f7ea9cc8b61fa279e4bc03f22c0404d24 -size 654121 +oid sha256:51bc85f7eae483bc7673bc2873766b291eb16c95dd569c87c447ca668e9e2d40 +size 686731 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 39a2a83edf9..67513e2cfbd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:10307ea2474a788863cccd593fd04695da8a0b1db9d6c824e3374824216efa74 -size 610064 +oid sha256:9f0a4fade7aa2fc163ccf0597a209898c06c3b32c148600ae3d47d334e8b1b39 +size 630489 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 872bd88d151..b7bb689d24f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f1916bcc020ead13cd7279aa6906e6bba702a7054f43bbe63b296164dec0a9c -size 642473 +oid sha256:b8f04d088ec735d12dc45d6161036fe52542a5be21877f6c8422a6778bd5a6af +size 675131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 313dd3409fd..a83f41da5b7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77d63788a7c23145b9d3bcedfc1fe10a5934617ea7af62ae52b837c3a815d013 -size 605322 +oid sha256:f6fbb20ae791c97b28031aa395be207d16bcfba0e4952d5c0f8a1e1c88a6b9c8 +size 625943 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index eb608ba4ede..6de76116e19 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df709fb42f1711445883b0518b29498a13ca3b2f567467035d7ac431549f0f2b -size 641275 +oid sha256:2db11911ed856a28b1c895629c50afd4db0f34369446c94f6c77f7ca18bb6af5 +size 679459 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f41dbf802e4..5a8433d6fbb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ea5a49eb89f2b46234b66a02f54ef4bc7b6c3db6567b1acee4dc9532365881ac -size 556314 +oid sha256:5d135382a6eb2b833a0136d49da2c908a7290792cd8e0ee4d52d713e05b671f9 +size 585470 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index a84e0fda0f5..f035e58e0d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:652694365fa76b2487b93347cb43f86b8bbb2f777360589d3fe25e3b2bacd08a -size 596920 +oid sha256:1404aa5ad98c8c29b0db4d11034365cc8f052382f4ef6c7b351138a903433ed0 +size 622921 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5068cda60c7..31996b7b3ab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ac9c68a6d1677dec6bd84bcac1e748e6363871297bef04a8bdde05343632230 -size 522568 +oid sha256:7189b08eaf48ec01573ef880391453303b44cf4c80d5e8b4cf93758e3e03bb42 +size 540820 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4909dc6dbf1..78899ade96a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70c50e66b62319d377eed80cbfe6d28bd487237205819eb846dfe2212186a686 -size 640505 +oid sha256:110ae9ad718fc098b2d88a2c08f8054be77b43eaa7a3469987c6a9c3782bfc11 +size 673213 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5b08dafe5f6..a5f8bcc64b9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8f8921110cc5be8aa7e30e995124df7072a3de5305d763a43242f47571350ca -size 595658 +oid sha256:9ae5264628f02a674e610c6b919c31101274beb4227a9e2dc46f3b686dbf241f +size 616180 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 35feaebe742..e2d3b43325e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba00553039d691427af015731363517da6a423e72cd3d9ebf5a1c0fbe677394e -size 628067 +oid sha256:8e824d562e966c1f8e0bb5b4d0002348a1299d3dff20f6d41d5ce5e25a13a7b1 +size 660825 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index be76b3918c7..12790a69987 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c741dade3cc73d7af424a6abe36b4d19a3e3e0692eec1a8a339ddc086d8d8519 -size 590916 +oid sha256:293d2178b72babf30a68b82377521b15ade4f9a0a7ead43bbb054dfbd2981bc1 +size 611636 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 8626e32bc08..376358c8c84 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b7be6c12abb7af81496e95207e7f6159b5930a9d8204b7a75eee2c2831c7e80f -size 626671 +oid sha256:4d568ae129104e1e7772b5e8c422021537b80d9982b0b17e611d305a01518e03 +size 663573 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 78773b1be5f..9edbaa669ab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f03b00d6b8e44e6e59e252cea365bf27a38c05a2c14692484c07743c6d34e17 -size 542006 +oid sha256:223a2398279407c30add5686fd176cdcd0c5b7d8b1afa344c5f689ff1fa66bb6 +size 571212 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 4edffda5118..7c362a6d387 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d4be93b23601f51d6cb8a0c13312bb7dd0e618b6a9d342963f61cd9a8ff9bc6d -size 582614 +oid sha256:9968ff52f92cad9ea24d3789cb795f86363abf7d7505e4e5f1456258b9aaf3ce +size 608514 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6b2f0b644c0..d3709c30f15 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:790f2fd40d6be7c6578c078cc312b69a56957230064347362b31f28a1fcaac87 -size 508260 +oid sha256:4168edd7702ec6d6e205ef081082d94453b35559e121cb23a1aa90b77668cc2a +size 527352 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0b2d9ab0981..c138b736616 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebdc8cfaca6f09e50f16684774d302a599435c4e54301d8bdea80cba31905b76 -size 673839 +oid sha256:b126efe191d7b5c7a3078ddca9fd87728482caba0f2f8ab3d353be4fc32c825b +size 707287 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3580666d0b8..237f6eac885 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d070f14e66dd0ac160161e7af0cfeb8516d35265670d2d00b12311ae0cb9538 -size 620853 +oid sha256:46f76b30781115588c9523ba31fd44e01491c32e4953a19347825171fc9d9ff1 +size 638811 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8cc5870c4b2..a183053a4a6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d5ab224ce46f4c60ad881bf4023ce689ca4e48ad446a07864480da3ec65ec23 -size 662141 +oid sha256:eacea3e7697ae20da236fbe332662d0efcdf1b39d2f75711a39a8c247db4aa29 +size 695737 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 52daec532fc..3bd1d37bbea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:394f3c231bafb22bcd418501356a7433a899fee93d247024b1a8467fb6590cfb -size 624201 +oid sha256:bcc0b19a4e3a42f378012f89226463fca87e3c4c7e3ee5115e652f34fc635ccf +size 644133 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 691da2e84cd..4bedf582716 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af8dbdc1008ccf922419a211692654df4f131f8d54b50327d8cae5630533cb95 -size 659513 +oid sha256:da6beaff163f39b74f77af5c47d4e2082cb30272acdd6eb760c0f67b49c8cc69 +size 697943 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d75bbdbe094..69c8d98ff6a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2161f1c8a7a8a55121e2b589bcec5cdbd3b0b4f2cf4c85625f90d6b6a304442 -size 561626 +oid sha256:54c4516bcc967df3323352221729eeb482ea597e0558890a25812b6aaa4ce3a8 +size 590584 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 799e8f4fd2a..2a6b6d0ed3f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e85fe92e37635fe4db0c5e78c08809cb233500045d29425c5eb71a8ec707ec54 -size 619995 +oid sha256:be7a91448cfb8e66411d087c005d175f6d6d848957b2055b54967113690593dd +size 649447 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7e914c99d56..0036e8f20c1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:666094bd72259c131640685ac00729e9c923d84b1fd666f5d62fe781f8e660de -size 526942 +oid sha256:f947d8f59963a443e2257349cd30396dbf0e912bae1b1180d8bcb4c0878e2cbe +size 546182 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8371aed71bb..587c4472405 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d68c1b71a088a76783e8aa3af31a8798e5fbce9b42af36f636c6e878afd2212c -size 660321 +oid sha256:d5bd0f7fefab8ce68cb8bf6d9c72f1b293604b9d1f6262b199483f314e4b2c80 +size 693819 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 15f8477cf60..6136ce86581 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eee868c5b5f46bb31da7b477178d050687bdda5d9aadc88526818a2edeeea5fd -size 606546 +oid sha256:1846538aa1fe066ef381b2919534ffaf85f239750f15bcfc082169d18c0ec315 +size 624503 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 46b1b0d7dd9..e4ad0ca4096 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b8bb99e954e557374ebe567cece73c65d3769a287e178b5380edc12bd49882a -size 647835 +oid sha256:59e0e31ad4a137fedd8d4da913cd5d7ed6984e8acf95ccef06e2c0119b29a435 +size 681431 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ee21f792318..d2e2e4e6c32 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15fb8718f8b420ac8f79d751ba29e42a1e674b5fa5e258519791bb7490041128 -size 609894 +oid sha256:af09754c20a9aa409032859dfa3af389e5d197935b1a13a2e63a013fab8c6b01 +size 630615 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 18b4640b3c7..f3fabe5c212 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:033e9b01025af9447b1af846b149d7df4a596dc93f353d614e4e15cb0eca8d44 -size 644417 +oid sha256:3eddc31247d4dcc263ee67edfb1c73104e0dffd3e6dd4fb87f60569d9443c5d5 +size 682057 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b64d5f6f5a8..0bed3bfa356 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:392ed919f89cb8229931188e380b1bbccf78912472cadffb26891c24f9fe19d4 -size 547320 +oid sha256:1e7b442e52318b1c1ea6d57f14da669de251ef98d6407fa8b2a6616af6bf35d1 +size 576328 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index d8ffed7dbb7..eaa16ec8367 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e57faebd4d8e0a84462c0c8d1e0581d469005a59fb55f3f233516102ec60155d -size 605686 +oid sha256:7172c73e8cd5384569de1cb0e8d0c9b0f728fd0e9d34f0baf1fe37c5576ec54d +size 634153 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index da537a5b794..688f5035af6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:66d5bb671cc368258b1af8a029ec4ef8fddb5979de75ff2bca5d83b0bc8af8a2 -size 512636 +oid sha256:6b39a5b2be900dab8d99d06aa7ccbb26111cf5809f1f5b17da56dd84d32265eb +size 531926 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index cbca6f4d3cd..efc137919b3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:56ed12c845e32c2790f004e16fe8215f176937d21ffaed993d1277175134da8c -size 795363 +oid sha256:8c2f110798e535b6f0ba2fdd2fc1e1077abf07a242811690093da2d84477479e +size 825653 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 92b7051cae7..2da76f6e3f5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d2d72c964cc52df283572fa1fb214f4daab668127be686da9315e44917f9892 -size 755055 +oid sha256:9f71b960570f0a9e53c71844765e2fe4ae3b88e265e75858bda1097ee261b223 +size 773407 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7a9ebf13762..02590d0743f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f42259e9a4eed589dc1cf6004f07bc91b2cc02b03951b38de5c0ffa7be4aa9e1 -size 778781 +oid sha256:470df6965ee064d542b4a4e27177cff5025f3ba28608e038800506fc419a3fbf +size 808281 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 62fce480f59..3ece2ed9fc2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ec26997a7d0a37cf95f548b6c8e79288b5b255e325bef621dbcb53bd0801494 -size 744985 +oid sha256:a45307987aef716eb32768460567690ad8ed191b99e338a0578393a8640b2653 +size 764127 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 9e62403df64..7157ad4a70e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:87b5a8104ce97ea262ba9024d8238b1f0c42255c1352147b19737ef82a96b996 -size 757603 +oid sha256:9a8f3d551e8ebaab85066ba881ca61d277352e03c7b8a5f908f4e617c37f701b +size 795885 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d894a323e7b..c2908e3c5f0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:552707e55d96f09fdae33e8661fe4ed611a98e4d2fc8ac0d72b82f5c95f25f66 -size 693313 +oid sha256:d10a67e8d28b02485f0fbe0b88a9cc0b0ac22c3fe10cf33d86373fe8394ddb10 +size 720891 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index e035a1e447e..1c3b543c528 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0d9bba3e7e83ae46e38d191f08494d49f9fd967d75e11b37d2fce106a25e74f -size 708661 +oid sha256:d94a82872bd8bac6e6abb679f8a85510283c63423c0025611cad6f7402dd98fb +size 735153 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 74be280a0df..c6f296c0cd3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ab590d8aedc7426bbc6a3c6bbae7daf8ebef2b71d5fd7288c636d2b99f03d8d -size 655965 +oid sha256:c35053905488285a49275c59d6fe99af1bc90504ba85a1c9046497334b5808ad +size 673627 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9aae529ea61..36c04d43b2f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e853ae30637e7a95d32d6a66f54be3f32d575601e8d65d5afe6ea87068a8dbb5 -size 766207 +oid sha256:6e406217e08766791c90a8e63ae4e929621480f15a9167529d71904aea92dbdd +size 798667 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 99d0150c26e..efbbe7f7dbc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:efdd23bced759757c09f8a6d1a73412e7d00b095f2ff95ea328b27d93ec11ed3 -size 725899 +oid sha256:119191ef80ca9b78ac32ddbeeb3acfe9beebd05c8439bbf1387496c5732c9d24 +size 746421 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 21d6e65daf9..326ae4e0d90 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2fd617cb48d6261826321d24668ca6548aeb659a9edf5ec71c0fa197b26f5d0 -size 749625 +oid sha256:f2cad993f78e4b88c9ad6157c18fa65f845542861e6d8cb8bafb697904af848f +size 781297 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4e82165e4e9..80c7c99ceef 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:294f00535125a753a38d4cdbf859969b7c5f057927701a4fc1c944b9b9b28de1 -size 716619 +oid sha256:38333c6033a6981223067fef012776695117e470922b64594599dc5e75120dea +size 736303 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index a565381e2a1..f0267541b6a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec638e6b065e1fb309118e9e356e943bb660d6553dc618f6f1dcbf6794160b0d -size 729779 +oid sha256:c3ba1b72bd40b0e86ed0f210ff06d19825c0455a38b1a680a27da363a79d8178 +size 768061 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 02443e2cb45..95e41d79036 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a96997bf872ec323d65348e723ac3448d2ccd32a8a3fe77bfe742ffe1854cf16 -size 664699 +oid sha256:e3ad0b6ce3e8db3147a7479cc0a05cb061c2c3e3381b4ef168075092588e0e75 +size 693905 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 8bb8274c032..c00ca4d00b9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2b6e9844bf3282fd83a7d67d7e9824b6d18be6276d2d34bf852fa1105597ded1 -size 680739 +oid sha256:b47c44d0eb300dcab8a1a53a8b26608c24da7c6fcebfdfc918e89a70e9ad17a3 +size 706639 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b8bebc6653f..f637fe9ba38 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f5509d803f319833f787fb0f6b008f5f7aac42729b005a35bf55e06218db91c0 -size 628191 +oid sha256:a1b183b835f423cd9f3fc4c6802ff95c5b7c83743a99ade2c0ffeb3e2d62d1df +size 646641 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6a6fb5d2308..1f8c1852999 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5ba47087110cfe048744c61f9d4287f69792f883cab851fc75863c357ed56995 -size 814439 +oid sha256:6b624f2046f2686f0bf0f60034092f1ff973e122805c9c528fefd13c3ae6df35 +size 846111 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2daa90306a8..258d26fc38f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46241f55f4d7ec10b52d0d658f4615907b45c2554130bfe4c4fa1c58ec528921 -size 774131 +oid sha256:93213c5ac1271b1a7164e517eeff3b53c1b9d6e7d22582a56e65ae275d8c6bc9 +size 793815 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e2f1417ca5a..8efd109df3e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8b7bb0d56c82aa35f2f4400ec19afd5cf75b63d2dee5012752ad89b3cf38388 -size 797857 +oid sha256:197e0a3185839c6570259dd3d5141a652f4f2fb3df99c7a4c6405856b5bff9a9 +size 828739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9de25d80784..c2b84a4ca4c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f41633ebb96febb31d71ed967576b74297fd046cead3f6b730e2b4ba8e7b9a4c -size 763223 +oid sha256:59a819e88a133dd5cb54572dd1449e9def11dc21d51effec3cf2cc876d8c16a2 +size 782957 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 83523f73c20..671f29b8343 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c7a4a2c4014c6620fc043fd48328454813a738541d172e9b369bf45ad6dba27 -size 774755 +oid sha256:0ac6859db529c4e5c7528ece42caad5983166f002d8a564d3824884bee5d5890 +size 813925 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e723566374c..745d3bf2908 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0fb924f258491fc678450ae6132ee61d9ac535ceedf4fc1296e4ede828dbf16e -size 675637 +oid sha256:6fe3e503b72880912b193927e1d673fad41ab83a0abc2e5e1d9099ee7b179f35 +size 703855 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 496fe6a4878..0ce2e29797c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47f738aecba6e1f3ae8c4fd8d108fcd1276c28a169ed71891f0f7e95932c515f -size 732227 +oid sha256:a22ec965890342fdb9206f376e0f101da89ac098a2a3e1888b2da6cd9125c7a0 +size 761383 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a77e6c87f96..998ac6f2811 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8959ff698484ca6fc2d8f610897b3c8ddade92b412302dcd4fc7919a58ded8e7 -size 638979 +oid sha256:d2ccfc4f06cb14f8c0f03fb1034c59b3e4428726f3fdd8472b97836e969ee10c +size 656839 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5a0ac01947b..ec28c70eaa1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e593a6a63b5fc7755472e17d174b11486765a9d6511d2a4e6305f2eeb572e00 -size 785875 +oid sha256:523016993ff35d11f9749af9a37d6ab62012551fb6ae6319aa9eafc98602edee +size 819125 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a1b983c9bf8..c6661c98f57 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:14cbf5071ec1f66199578dad0bd93e7c7d228d7bfa09882b17593eb0f5a5023b -size 745519 +oid sha256:f66f27eef15aac89686a13a2d606e98fcdf3925dae7159bcc00a9f146f561b06 +size 766041 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index eab8db3ed60..2b6529dd703 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ee48e89a915f6afe3d57e2ba46954fc7320453e7eef53d5c7d048e5500375b0 -size 770083 +oid sha256:ba8bcdaa3f95a72380e81ce13d692da2245634d717c018e3de66f7fd4e34fb3e +size 801755 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5a9dd8fba76..e7e715b0aa6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:430e30653cbfd4b91228407708c4abaa0e050188e74d238f01216c34ef20a63d -size 734659 +oid sha256:dbf9de7ab4013bb7d906f880542d8deb9ea4cbd30a1f70a0403f977a89f8327b +size 755181 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index c420a8b74f0..e21b4c1633b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9cf171612e90dc8d9db94702dde73ddcd87bda74c1aeb6b502be1e86c6eceb6c -size 746931 +oid sha256:3bfee67ae4e502db41688efbea3eb308c89e30b653bfb26704e388c9911729b0 +size 785361 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5efface6a38..221c7cfb94a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a8b64c47daf3c4c762837abfc3fd8797ebc499f5c0b6824af0c1987dd46cd71 -size 647861 +oid sha256:42b4f0a6ce12959d80af95187aaaf98a1a1382c659362ace9701fcfcd2648199 +size 676869 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 2327e7c72c9..226f4f0f537 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3a171af5eb8eba953b17eb5b0daeff9b3f6163843708b1949212e354231c53b3 -size 704453 +oid sha256:494f5c4919ce3a204e73d779b5686fc5a4f8b05fa3ba2e60dda408dfc30fb50e +size 733609 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 72ba6cfc16b..d04ac1d0dda 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65e121ae186c9027a981b42cf844de5986f5908b21573e620e27ac5b74a3a102 -size 611204 +oid sha256:857169aea083da08b3c84f3db0f04f8bdbae49b7e50ce5fe8889ceb72d8332d3 +size 629853 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 0768e68f3b3..9f7225da73a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0cd29e1d3b802cd1f589096ebd2e51e665431e0abf528c275f53af3fb54a80e2 -size 697141 +oid sha256:352d16a20f345d94366807b1405fa065bb7cd0a99ad19ad00675da72a1fa6bed +size 724621 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index a1bf57a61e4..b8106cae864 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9d76f49b82ac35e3c7f635809f9fe4d1cb5f6c909e685527e998c1f513dcd64 -size 683815 +oid sha256:99262d5195418ee75019e7466045b714b1357e97591001adb91c3555ad78b57d +size 710209 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index df15c981927..7c9bd6d369f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6eb3a4acc83657b4d0746149a4fe644dfe9590ef82e6c6576160ce2e8291bdb6 -size 662293 +oid sha256:4fbae733846c3e2e5bd892ed09c2b9b361e589f3eb19f6bf0fa1164ace24416d +size 698503 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 22c3a78b442..d2773efe8c3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da107cd9fddbb9538a03dd8da5170d3e4e4d02d154b481a096dc7ae7282b7427 -size 584880 +oid sha256:1d65ecc5856e6005571abc9125fc3ec17d084f1c1aaf9e0bd130d2f6820da1f5 +size 611816 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index b20451a280b..a823b2e4e0e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:50275c8e5fefc022d58d189d92a32d3989d408d72e996550f46fbcc8745f8f27 -size 689989 +oid sha256:c308aaea284568ae560f450666a84d1bf8f54a4f3d35e4b0c1eff0c866a11068 +size 717467 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 65bae22c447..d5da7ab2c61 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2b2119821f3636f63a52a9d9ee3282003cc7b3de7d14d589fc960598afac5bdb -size 676663 +oid sha256:470109b7aa0abca340ebb6ff41071ee7db42512f208139d1e7a8eeb563e5ffb9 +size 703055 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 218f0604af9..3cacc47b0a2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:effc9766e21ed6bcb327e599d265b5f804e9ecce5cc5d7c432f0059695c277f9 -size 655139 +oid sha256:add0d027b6f9078d457e381f4f0b99206341e5f195a29b74586d6467c7ca11bf +size 691349 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index fa13a184ef1..192990f2be7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aea24a2f5c8d9d0db43001d740a616d8219aee39f7e86665e13765c193409dbd -size 577726 +oid sha256:f30f4f54f651f3473f43a6e819f25127d96a645daa75f6e6a2c5583ada0aca8d +size 604712 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f5418c51080..e838f6b8a50 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae810cc14c13ed591d8729989fefc5862cea50b245847e4b3940164024ac52ae -size 609964 +oid sha256:80852fe45af456df4c80f55d3127c05b404d07a6dd0e31fa3e00eda3812f8911 +size 640453 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ba4002d40f9..c92f622ffe9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a6bc9247c6c798d17f0518c6dedfeea2e206213f13653b1a567c297ec63900bb -size 585000 +oid sha256:406ff311476c226cd1b189fa45f4a852e5f372ef7bd6a39b4053a4fd5d93096c +size 605424 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 35271e5a1c7..3ab47a9d4a8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba1424c6eeccb641d163f962eb3aec4d654284c0251b32f4a814c4e2cd4fa14f -size 602214 +oid sha256:b7924d389bc6b7a31006f0f1bb4b1febc4692464a1da4901bf6d39b85ac14393 +size 632307 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6d5e48f13b7..e2c8bdc9b9c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8918a21336b43520f34a826e4c881942f85c88fced96401773e1baa54302a385 -size 580258 +oid sha256:e578707810b787fe06c3e99a2268ea521029e985da56575b8ad2c4ecdd463f09 +size 600780 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 7d9bf08a456..75c156f74ad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ab54e9a1eb97c82ebc5eabd80d3b4b1a39c7bdc0340d9dbf8a6302b6ed93c17 -size 599880 +oid sha256:fb64ddd2332e566a56c1786455978c5b12f5f3771fa21a95260e119a45ed1785 +size 635303 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5e21ba047dc..eca6369b425 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bf989f6e91080a01e148f6e6855e0a9a34dedf71d28369abcbd2e3c9728af16d -size 524492 +oid sha256:98c3759af7723ef95cd024bea3fbd2f6e41e9e6c1678f5467ff18fb8b9e41b6d +size 552858 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 96302b0ce65..3563e68ad17 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c517f1bab391b97f16b91ffd7767eb3befbd1edfd26284a8f04522711c5012b3 -size 573830 +oid sha256:137eb9166beb993ed9644c01b2ed371e65a9820469e0c7901e07b8065b984f53 +size 599928 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 752115fd5bb..cf60461e6aa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47d3e312af0b8f63ac3520d836ca689278804f1fee7eb09c28725676dfd45605 -size 497702 +oid sha256:68fcf5dbc424f84729fe8b43b769d305907d7143df6494ccade843e13456faec +size 515954 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 46dc462efa8..695cfc9e182 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a72169992fbb65d465259dbc494882ee78739ae7b9ce9b6c8448ca67f1c99b9d -size 602812 +oid sha256:8702d02b630faa580e4fc8bb921653d954762bb78351063651c281b8a815122b +size 633301 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c2a1fcbbb85..61b5e3b2c2f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b11d6ac80a609e7ebf508775d3050f12562db8bd0021006dc0c21c970cc7b942 -size 577846 +oid sha256:711f7867387d11fd03542977e1970b689171a5ad489b71709fa98b6200594cf2 +size 598270 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9b6555536cf..89fc0716246 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:85d0203c4135dbc936ff3ef9d52acaa242e0c4f5b1397a87f5a8903010d89865 -size 595060 +oid sha256:2edbb52d3341a5338d96db9e882419c2a5d763a337c3aedd935d7c4be0fa5f1a +size 625155 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 01d0f61c0b6..0e410da8e4b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:406b7a51cbb3227854c705982c4928324715bfc14faec22c8aa19dad10d4f804 -size 572316 +oid sha256:2bcf55bb363d826e360b31711b87a486e6eb09efb89e04b56daff11866876308 +size 592838 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index da6a1b204f9..a9939ec1281 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:72e43b1ef964a03908fdc1a1f17e337526b10ad3469d247f1b32165af7eea4b2 -size 592728 +oid sha256:d85d81d9bfa4631fc99908b786f0bd2b1b8c675964255e7860df88a8d31cfe1d +size 628249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c375df2a813..b386838b74b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1cab5ae9d391783bf153fc6c169f86761352140a60ec937786846456f7a5f5d -size 516548 +oid sha256:1e03146d0d9859dee2d02d73b7e1c8ed55215363fab3b311e61516e7db074eda +size 545704 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 6b5321d99f6..11f3d6a4195 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf59b28cfb25274e6bcd13484eb8c35842f9b77bf6ffef25a0a333e3f94933e0 -size 566578 +oid sha256:bb443412df781d1f0e7265be77a178e2938753a08b948ef428103b78dd743a56 +size 592182 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9e738898d34..4eada4c349a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:496f028edd5063f09350d3de1845478a0c954e30e92e634a85ac169b062554f8 -size 490548 +oid sha256:f48f135682493c5f6dd065a3191fd55138fef7ca72a5da604c92184834898972 +size 508850 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index cdd4b6caeb2..ff0ac1ca3a4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6202b4088a5e1381c650e9e9e37f774ba608eab3f6aee5238c5b0cf9ad0f578e -size 630621 +oid sha256:73d4b33fe4869379c0f570c4bdeb35c4aa11715249921c53f1f174d6ecb92673 +size 660517 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 864745e628a..769cac551df 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3aefef2ace4f96053573d3c842413cd4cdfb59d7b89669ae28ba2fcb4e03718 -size 595986 +oid sha256:20827fcf42378398b18fb617d269f17dda13f72997101cd5b26c897ef97ee29b +size 614042 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a13d717fc0c..78d66842e9c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc459b606a9dc16c19065a36201f4e7d272d7effe3053b6ea7f78b6037f25849 -size 621241 +oid sha256:eb7abb70ea711647f3addfd25a60253253424b3946f2d52914c9f13f36365f40 +size 652913 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6a488c44759..53dc0087629 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb86ea2dfbd7def3178a49d4eb3e5b120f721ab8cdf5d2b056e8c5f550ab7665 -size 598446 +oid sha256:3ba1baa143e38be446ba8a67b609c1075da2219fa8d1eb77217693123be4a417 +size 618971 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 3751c7287e0..deee2b15a9a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d2b9fbda58ef521ff94f5078df8d238a32f8e328f1eb60650f6080f47fb0e4a2 -size 624927 +oid sha256:2f59d0d37a18c7ff259f2d06688e0ff73c35d296d17ec4318f2baf1ea97e6ca9 +size 662619 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 117e435a167..2405c35489c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c211709b4abd17603f4413785d71dd93c2f5d0b45c62d201cb12bdfc84c134a -size 536070 +oid sha256:5710db6f07261a0241a5ac8e2f4508000e78bd8e00e1c908d16c8ff81450e5cc +size 565028 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index aa69bc4a0ae..62b937df738 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c5fe5c25ad86f46ad0c43b6875046ceb777b5e30b1ce2369e8013589a5be8744 -size 595128 +oid sha256:5647a6d241df3e563714a37a28df19e9e2f8508615d0338cff42f95b1dda771f +size 622015 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 71a17541fff..411c46802ad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f128feee417de6e1e9683791f98f5a9d1086094aefba9ddb86109b637e57e06b -size 509180 +oid sha256:b3480af5590d98fd7ed24f2dc7eedda893642ff9b0d69ea767aa1f070601d3fb +size 528420 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b2a265d110d..0dd4c9f0a21 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c7d96b2925111fab93ff8e9ffcedd8714b1d0636f2929f9464e516808b6f4688 -size 623467 +oid sha256:27671bec402897b1d9f33ee3a6ee8326d3de1e73ca9bd18bbaf924c3ea1beadf +size 653955 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 074584b42cb..ea3799d91ce 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0f24ef0d4ae15da5a5be057909cb65afe7944e2e7e6221d32207c4c27a5c31c -size 588832 +oid sha256:a451cf4a80fe2b284728c9669038dd86bc89fc933d6364687b2fcb9f61538ac1 +size 606888 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 37fb2289721..5a3f392b32d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0831b473003b938008efe5ee6199e4b54e6328d5b6c20d5f048326a3e3aea997 -size 614088 +oid sha256:914454e591056af60ac7d2b038e60e2a52da6f56ddd3c706bdd25ee04a0924f0 +size 645761 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 57fbd61b01e..768d0fa115e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:20020a442bd5b447a27434ec16a19ac71cf479bec0a9317c440cea1de6e332e8 -size 591294 +oid sha256:2b3373635b14a3f4679d2b10fef2645c62b18e2e0c4403ceffde7233c8f81b4e +size 611816 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index f8de6328d22..abfcc526a69 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:027460bdb21c8e2298842aae89f3894c807e3ed01951c6c663848cf1ab3883bd -size 618563 +oid sha256:ec14339bca35e3b5a39850e5025e6730725d3dbd2cb3facbb7d1d6e0effe6252 +size 655465 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 43b022a0736..417615abb02 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d459899ad6247cf963b0eae298c43c7d0e9d027b6db88e1e3c0a4635f72492b0 -size 528916 +oid sha256:ffbb6861b6106d59a800e5e2a8a8fb836c4537c7b874132e8a866bec614dffab +size 557874 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 0c2b431e56b..337dc2d90b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7fd01306d213c81692c04f848f9592611415dddae14220c781114a21c8d9652 -size 587974 +oid sha256:0da65c4fcc69e153dac89cffabe30523f33aad45a65524b14caecb0bbad00ee9 +size 614860 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0b82f849606..495c9dccc8c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b29e71d24b8794cefcfe9d2f2b91c2930c42211882d12e372822c1375f1d46b8 -size 502028 +oid sha256:6e676d5ffa5ca4355a1ef03af5df465f0e1d9ce5ed4ea5e01a1106870e263133 +size 521268 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 61e0f8fbd68..d9941ef7a87 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:778a9d0cf0bb7905fd05302a2522632679a3e3c3263163857bbcab10841de94f -size 915821 +oid sha256:fe028b3a6af8fccf565dce7947e6d09d3799b093fe1057f782c45b6928fb289d +size 942659 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index f6827d2b093..33b44392674 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f04223452b864d2dba21b3bb0080e26be18636398857755c88219eb4460294b -size 852425 +oid sha256:2b19a442c86219ebe7e80eb878b28947508553c24a4cb2e26ce071b5f929d43a +size 880249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index d5c2f0b442c..a0917ff39e1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4ce077fb4e17ba239181d532a951788a9a2ae1fc568cb7b3504cdfb25f195f1 -size 807731 +oid sha256:e97b68638adfd4ceb33a491b3366e59f69914d418a78b19aff2754a43a015043 +size 822483 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 806d6397070..4a3a3ee7242 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a06f76d89ffe39e7ccf0b254c71a46ae594e44d1d22176ef1af482b1923fe735 -size 869593 +oid sha256:b8a2c6f9954d54d51d5533e6df846c15df46102da68ed76d8921bee7ccef69bd +size 887403 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index d8bb61c9082..0ed40c67d9d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f805d0579f00158f380c8e74c52da70e0e3d0b56de6406bf25510f98a19efce5 -size 810589 +oid sha256:5d65cfb44b517409a6d45c01f886655f1a145636cbde220f49e725f25df0b589 +size 829237 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 1a288f1ca6c..bd47585e1eb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a411a5c6fec7e927cd340c916a7029eaa1723be75d484df535e04bf6ca403eb3 -size 699355 +oid sha256:7c4f752ae5e0913448212e20fa410086b335b3acf295e41b3422a8dabb411900 +size 716227 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 290d4cf90c9..0453b89d8b8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5810d4ee0572b9318818384b9686790174d6eb4e437aa843694d347de9d6d50e -size 905751 +oid sha256:18cd549300a72c9410e3d95f47e59efeb7e411282587a0e013c6367336a9f897 +size 931849 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 2c3aeab282c..3e84ef5a173 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a13ae32d78a6b839bc90a1fce87c3c3e88bf93a04dd64b1be8c9bb28fc52115a -size 843145 +oid sha256:08b51e3f259a990081a444b330cdeedff8a2b7d11b598dc8cd4bdb40594b6c0d +size 870131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8b55e84fa73..a3e741175db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41af0049f88c454dbca591f6ddaaec9a7a2cdead79d4b21cae253b546960177a -size 865295 +oid sha256:89f3c25d4f3fc1e987c635df965f67ad8636a9c783220d9de49d708bc78a2ac0 +size 883055 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index dfc3fe4e610..57e70c3dbae 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58a4782e071b6b70787c61b6098fd8977ee39fa11c906033efebb6cbe2751d66 -size 806193 +oid sha256:5904a99fbcf31df8096c1372a32b3081ccf47b2fd7e4413c408c13da56faad2b +size 824741 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b39b855ce67..568e8943cb5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:21eee0697af4af410db22c631c77823f5b6cee7d5d45ab10c5c866f2e6c449c1 -size 901643 +oid sha256:d625e5e099820faaa9075a6c5e554642779acef7144b746ea4b5982af1d621f6 +size 938347 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1b4afe9c255..1c0354c3f20 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93861500f9e9509ee59e28cb8b1853b3f43bbf2b9d5ca6ea96e4e446d9a11829 -size 818261 +oid sha256:ae21d77e9a9208d8ca2777fec8fb4fcc9a06da732191c5052d638ce036078626 +size 846085 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index 35637ff89b4..f68bc21fd27 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02b217919549753e42726c28bac336cfd5b6b948a2ee939be814737c7254c29b -size 845105 +oid sha256:fd805c77605ee9bbda32f360fd1859ca8c47f6827dbeffd42241113dc80e85a8 +size 879489 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index af4e8cc99f5..3b73eda1d63 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7347e30d66d0aaed4a5afc66481508933302b9fea76a0e674dea9f082c36b74e -size 754273 +oid sha256:3d1bc51147d2294217adcd16787541a3294d4084b3842815a3c4c85154f82aba +size 782493 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 73420e4b921..dccaae2f0f9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9e513a860e3a3b6a7ad2051a23e5ec82bf3c774e45f7fb09bcb269edd704321e -size 711117 +oid sha256:0484e9e6cec79f824ddfb3b7fe5960a6073e999710168cb3ff3abd5ce2dcf6b1 +size 733613 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index e8263104b0f..3cc0cd813d1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ccb699e66628182a6c920b86523f8c0825d8e8ac4fd776a1c3e525db5bc7ce71 -size 661973 +oid sha256:05cf6dc2b8d418ed59bfae243ba0865acdc46435d1b87aa744ac40b4880db609 +size 680227 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 6132ae26c5c..0dd209bf698 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9260b248bfd1a4d51791cf7d39a8711f7ea96a1ea03ede851ba1dd5f26e00348 -size 864345 +oid sha256:f923600fa9efe870f6c373d6b05dca34db03e4c2cdb8be085c313a242e99fc1d +size 892809 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 83a61f05847..719a1a35110 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:acaeb29401607b6a16fa9073d28bcae65c90728d43dfd9cea940804f6e0028c9 -size 784021 +oid sha256:b5fa84ec1f010417a78bf91daa64db340e87be783454caf2a00a1d7fc08b8d1c +size 801929 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index c67af01a34c..c1ce9a8b980 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fab500dc357dd7e1027fdad20d7d64725562ad10b0546f17d1633242bb6d5266 -size 811901 +oid sha256:13dbb0251150cdd248a0dbc038aa2d7a227f86c952f653971fb59597eaec5fdd +size 837555 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index e50470de284..320cd75b047 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b531248dba5a8a913f028ccb197b34e9042f278be168ea56273f34430d2f4788 -size 725067 +oid sha256:51cf090f8f68366fe592ebe85d19890954a65a4489ae9b7d4dd40b3a91add4b2 +size 743615 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4413f9ed32d..6c69e588ccc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3296f61780fd7ac8b3392f4621556a09f65db7a9d3a6355d9bd3d078dc46465c -size 869053 +oid sha256:9a53e39f4edb4e8216ab53e24480650c3d38b24667f02e0285b65d4bffbe095d +size 896779 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 3d335f86737..a13d8af37d9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b26a33c2ab7628afb0066f4b3484f1752acaf51b1fe2263835141e857ee7f150 -size 831015 +oid sha256:0352663c468e1d9a8bedee73a043ab97127b3960f066893dc0ec6705f417d781 +size 858839 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 136c56487f2..aff9361ba90 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41eedf2e478f0d8d446f9829bf0fd994786a1f175e6650348488cf06efa1e5f1 -size 750703 +oid sha256:ed0b1cfcd2f78706036633436ca7988338db572570be87fb4e5a4f7837db00c1 +size 763923 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f9a81d34413..677fc169fee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f26bdd1810c1c4b63db4f3993b392dbb31dbf8bab4cffdaa213f7ae00152d026 -size 823713 +oid sha256:2fc75ac4286faaccbf977da09dbaa83531d0a2094986a6ffdf792f8fbabf71d3 +size 841573 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 5ba4879e442..626e9864a31 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9957a5e876f36720e42b43d0cc09a47e3b0ad111ed049becf455b757711c959e -size 789179 +oid sha256:e3fdb9672faf29aefa6ec0038af9d3da862b2f49d90895c080a2e5c6e9796305 +size 807827 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index ee9b3a7f21d..216864fc2a9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bcccfad26804eee5b74e22d7d8b3d7d695c08702f367d7f0e9e6b82c10cd8e13 -size 642423 +oid sha256:03496f7bb0337bf8a593a4bb2c2c1efd36a06dd35efda6dd85b4b9d9cf138b93 +size 658507 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f6887c534b3..1145df509dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3a7a3b00426f25096f24863325481a9eecc0ea220f8d0f0e2b89a92369236a55 -size 859773 +oid sha256:dbb04746cf5f5fd78c103f015b0aaa76f430da26a19baa1ed5c2b46eb07ecf32 +size 886757 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 228950cd763..97ed359ff3c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8b6da0fb1d5cfafc9d85c9d701d0b22f31027a373655e25f04860007bb36e8bb -size 821735 +oid sha256:b19b1c8152e6d4d1780036defb657859504843dac0dc86e14d9758d9c47f148f +size 847931 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f8dd4d73115..5faaf925964 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb14eaa9e0407795914bdf3215118fdf6c6ca16f6f3b389829d95a66382ebc18 -size 818527 +oid sha256:064ccd24b5ad5f35582d5a1abd5518744decaa2d572344aae8a225cbd6c0e278 +size 837175 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 73a6bd6916d..89538bc6011 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b33dac5798f988f964d25d667c481307054c9da727c2fb30e985c4a6ea25be8d -size 783993 +oid sha256:9458cc6e096af4e7e2d7182e2f46f41c171a2fa0317c80e042d8303db123699f +size 802541 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 2821cfb4473..b2320a14f43 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b538aeb2c68785d3df4acf8ea573e86f1c7104859493e80de523e71cad3a6c0a -size 857587 +oid sha256:8571c536336c45c6de2ef334f3e30576b988911e430bf068a8d7217765a1abc8 +size 892269 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 401673cfac6..337d9a59eb2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46a1242cf94d2e478a95346eb91f290e9af51b15f5ba36b6ddc20089422bbbae -size 771641 +oid sha256:721efc372533fd2b6278a093890b002e6962d56abe3eac1bd159570b02acb139 +size 800205 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index ac56e564374..7e4d18539c9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:369921eac2d2a945b00aced15bceefe950597a202d98ee9c305cb2e855c97b59 -size 824533 +oid sha256:57a55fdcc987b0716081b8931e978af5c1d4b1e5f23773804656de53e52db93d +size 859017 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 88ef2bf86e8..a36f987878c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2918757017e87c4876052df5f28a7c4f41c271c60d999362e3093676befe10eb -size 733653 +oid sha256:033519dd713aef0d1756f65aa5fcb54d51a83a8ae8f902303d1a9634ab77dcee +size 761871 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index e7d223ae72c..0a41c2c534d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f009daef627a4a721b2cbd9fd00e954743dabd93f155e898e8f0a47e2ca29fa3 -size 663559 +oid sha256:5c7d38e9e6178b26f038f4d739ae1f9f8ee7cbf5038cea339e7166fd365726bd +size 686055 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index ce4bbb80761..a41328cea20 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a56fe6d6a2db4bfab158d518b63ef4d356fa1891376f1d8a5813056545eed019 -size 605880 +oid sha256:b6c662c40574bdf73442da4c33bd256146b73a123f615f2ea44fb22e1ee8d3e4 +size 623345 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 6d5f9827a62..2008453ab7f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6190b38eb1b47e481ee1c2b79dae619bbb0be02274ab6bf5eb88d51326463949 -size 820289 +oid sha256:61c3cefecae5d478e7817aad00a9a8c6963b08e8a44fa7c34f7f6749b9a2b56d +size 846733 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1ea263a5b2b..ce6338aa855 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6cc3ec1b42328a31768f241870334870bc0e38cf8eef038c7edbb15d9d18d6ae -size 737401 +oid sha256:55ce71451f6f099bef45c7004a97a4e19d77ee4d4e0593fc1688b04e08cc1e18 +size 756839 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index 035b6b85ab8..62a74fd6733 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb81346f8da4a4266ede5bc33f8c6003c53a58ff5be282feb219bf35f9962224 -size 791329 +oid sha256:cbbfb63024cf2d3c2f9b38bbebdcfc5aff0d3e55725d078578c69fdc350ba579 +size 817081 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 33b66fc9c0c..b062f192197 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e3a5a5e1ffb8503c4e545e7e58d3262009afdda2678a6feeba453d6b5ddb024 -size 703655 +oid sha256:cf17818b5adb8606ba4ed36d2d9aa9fe8f531b3fb014880f4414c8501e6480f9 +size 722995 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c92f437dd5e..97da365c8fd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e0f792e06bb69ea35c38a267b6a1a6072fd4d24f06879b83a6c157d6fddc2de0 -size 861455 +oid sha256:c8b1503a725fe88e97e9c3b70d2bc909582f13ccf58706dc2060e185a0519100 +size 863675 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 82ae56c18a6..6d9be1789e6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e48dfe9a4c22c250f6474626493f5b934a477c42478e510e15df451835fdaa80 -size 823417 +oid sha256:43e6bfdecc33f99a61855e0813eb24ec56e8e17a2aed3076f833bdc6e6959dd3 +size 820161 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3f362b680f7..07cc068514e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28af5964174c6be4957ce2d4a056a7ce9d4606d4fb8d5691b928d0901bf10e04 -size 849709 +oid sha256:5593d2b79f41463624c6b88dbd127a4fa84eaf6438d03f6822fcfb0b86c89b30 +size 851681 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 655044b6294..adf3720c595 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:099ce581d33f3d2e36918fb24062999ba5ad221ed0bb7802a5efd4126bea5286 -size 819267 +oid sha256:7c0dd697c56271e530ffa6e422b16f9ea0a55661902f8076b79de3ed241f0952 +size 817393 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 99a7a460a5a..4164fd5bcd6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27db82166d63cafd5162bdf6defec890658a8697f38e6378fe191d57262cbbc6 -size 867503 +oid sha256:d4680ad5aed58e28ccbb603a44de0d02c0ccf3d13b202cd82a5458437c53f228 +size 874805 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1f511d209b5..5c009420317 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51de39ee05eb722ec10f882cadd54740c3ae5f79882d6f40455cd34e56a5cfbd -size 769371 +oid sha256:8f80efefd06d7f381f0d5da2d6d697362f979dec203436e25b2ef02855a321be +size 773515 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 2cdc0228344..55b247a5d77 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9e96dddc62af5ee21d296337507396830aa099f9387b05795340c37e4534f52b -size 838395 +oid sha256:0f55374229b18ea317dad845496736323191e7475d08d0b2683b6cd54ad06e46 +size 840121 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5201740e438..3fcdfb3335c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae0f2457c855384bbb9afa17bdbb5664ac9966419edb5f53d7aede2bb16eaec8 -size 744309 +oid sha256:e4e3ad59af5543b925da3724f9b8ee6880a4b1dc12b024fea8b6b72d760bf5f0 +size 742631 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6158f23c0a1..54eb7ebe89a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:66ee24e019b9402647ce358b20d731cf0fb60f651291a0eed2bae949ed4eb864 -size 991251 +oid sha256:228e9de3cefd7f20beb642d3c64f4cd258a8c94cf8befb6da31291d60eefa28c +size 1018187 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 02896201dcb..3166b667ac0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5c3d34d944a506575d1de774c6361ba987ec9d92816ed04570460f84afedd38 -size 908517 +oid sha256:57210a4c8694b9aa0439e4f09ea4760c1e4d845238bfb8f0748e5ac71518b50d +size 936341 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 47a61a63c6b..da5539d1f95 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eba346f0fe18b402c47d32e17ab3956c550b9efc26db477d758cd6c142f35bd0 -size 966643 +oid sha256:688dc964973c7a22253c2db279c4026688f770cc24fe907c46762e81417e7525 +size 939903 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 13dc8e8c1fa..fb50362e848 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c57127fa6bc94662e15e09903461e6eed5cf69dc23b327577a2c3ec25121909 -size 890859 +oid sha256:3ff98f8384fcfd38bea48b87574e6f8510d827d629b373c609702ea3a82315a3 +size 903339 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0f2be0226aa..d195b179ec6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dcdbad04441b3f376ab91ce462b6ec1c07083d2836404136a8a5c1be15a79c06 -size 950451 +oid sha256:02f0e5a45c2e49e345569d258412ab644cb9cd27eca36a836f8a56b61353d236 +size 967569 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 86853aa64c6..22f16bd64e1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:499de2440d61a3550ffb004c03a876cb6cb3607b3a7192ec3f7673aa4593887a -size 871171 +oid sha256:e247e2511b49350cd1afb6c80b1dd7666cb977ac41f57d571f11fe7425221194 +size 889127 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index e27ddb82ca9..0f2ad13da81 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43ee8dc0041f264750162de1246d639039c1972df16002da594b785e29fced8a -size 813767 +oid sha256:494a456ad4b188ae65f62dbf181755cfe7216f7988af6c678b96647383915805 +size 800693 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 0f9a578bb5e..521500d1415 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a2f4f36e6bf857adb2ce6e1c1767703603696b7b953000b30872a582b3141fa7 -size 768027 +oid sha256:92d3e36904912880257ee597f4a3963dc7cc1f3cd2da355e789c3373da1ea155 +size 783567 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index cca3b01d3a9..a1e2c970b5c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:107b74320e743ac62efe91c9de9f409fb2f6cf5611cbe3facdfb9c82e92ab12e -size 975459 +oid sha256:0914f1725efcb85979fb8a9b3aa23fac07466e4ee7492a631c1b222ed50ff21f +size 1002395 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 38f3179235e..992de7e0878 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51427bef4f87f76c1c9fce4dcd21d1c58c2c6be08664ccbb8749c5e1d7241b02 -size 892725 +oid sha256:7c1a3e83378fccaec0b141469538a651ef2cffefa0b6176494bf24961fd55a1d +size 920549 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 343e5b9d90b..8f42bb205f0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c884d475f8fca37f726f81fc029898deec841d6d40a5f76f49f6b0b72973332d -size 941467 +oid sha256:84d5b86b0a8f2aa534cbabd150bb265a8fdb35a7fd258fdb214c9cd10793c769 +size 959177 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 90b04ee598d..13288f268d9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e73f6684446adf3492807a4c06e79bbca514bd63170e00b383094a9a5703579 -size 862185 +oid sha256:d553923fa36e4dba4e58281cc9f09fad697a76dfcd337ad320eff6f906d7fb20 +size 880735 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 1a7d095d5f7..a30002992c9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1815ff354ae4929b82d24eedbc9870bcc96ef919699a77d5cc67f911c8316d42 -size 980625 +oid sha256:1feb489fb1f060a733c7fca3f9d679cd090bbd6310c060f4bde23bc9de62a121 +size 1017231 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b2fe9b9f866..edc543d22ec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8e880caf941b745e3db70ffabd2527849e777179faf5f13286c197c561aa9fe6 -size 891127 +oid sha256:79be89fbf3a33d11f661697c6512242ff73fde6ab5275a62ae752f9b866f7ab0 +size 916533 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index 0b0c9d02f44..461c9028646 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b2d65217c32efc957bd3733306e92d5d1f9e5704d564ccbfa086a7d4a1d613f -size 899815 +oid sha256:8a8d0d549c1019fbaa77864884bbc371de396b87e16e62e0d1e308e3c5370718 +size 935137 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 16235c99152..24d373220ba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7b30fc75be9d8b42e5229bd530597bfc7069465459c3d50431c03f52bbeb26cb -size 807801 +oid sha256:d41301b0fe4f266b5634ab1be1e269466ddcf687333356fdb764df185ade52cd +size 834441 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp index 24aa9106ed3..2f62e765503 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fc0c2aa5670fa43558940f108404a36cced1b57a4c7013c2464fd04671a243e0 -size 864403 +oid sha256:52e6e75881ea8010d363b36ffed4709b3f0c9e1ce694e7020058eee124aa4767 +size 856215 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 9a49455e7d7..a268957795c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e795c5055377f51ce0a9b66ab71ed96b7b07b9bab15465b644945383bf58d73e -size 786745 +oid sha256:6c2b03bcf69bc882e11d940d3aaac0dae2699ebd6d9c1173c1a02854058132ce +size 805047 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 773eee8b081..9490ac2de6f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:949a591b30afe27477bc58737539c57d3aa1d4ef7456cb7568ae6cd7f6131c74 -size 774067 +oid sha256:7a477bfa2eea9b69580bb8a48f241ff7d3dd8798914d3974ab50a3cf0d6ea937 +size 764249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index f80798ac3d4..126b4616c14 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2bc4606357be71ecc35fddb4b42803065955ce4065c63c75920a057e6dbe0a2 -size 731187 +oid sha256:3cc0f4fb0d151f5ed770677ab3f5658f807deb3f5cdc4ee94e44fbb71ca1dd49 +size 747517 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 50e2504005b..4cf34e99ce9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb38b95173854996e4685e006ef2e90eabb44b2bfd20fab01983afff417ec558 -size 936815 +oid sha256:ed3ed71864c41fc45fcde6d787db6678ce5ff09eb956749a66f381e7b1e7865e +size 964393 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0cd0225fbc2..7c609d83197 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9a269091fcdd70962abc0c287cfcb16a6b9b289900dc50f926c10b4abdfb8bc3 -size 852595 +oid sha256:5b0b1430915a7c2398bb05e2ddadb4ec36ec9847f783ca9ebb692f6272a9129c +size 870503 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index 12e0bf1c09a..c04d40cdc49 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:858108334633e1a2386bcb485a8b126bae1dc129d4cdf27c1e3e2de84fdf546e -size 859359 +oid sha256:e00601ebc79c53f6573bcbc3a708344acd2676f171e8f06d5feea3503d66f7a0 +size 885901 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 5e203f3c199..e794ff57ba3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46657cf8ff924a414ff00902884e603e34635a7604e457cf3b6ec9f7db2fef94 -size 773463 +oid sha256:dc6e2408b09d0d5f362534e189f69a6e6e669950ca19b5ce0e3ead8af467314e +size 792851 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 52452ec7aa6..9da6fc948e8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ad31c17f4a1915811d68e5f425e7078841406476f0d3c8cca907b85d9d659df -size 935455 +oid sha256:bfddef269dc67c869f9c6f3b57008905e9e4a7d9f60f98ba2fdb6de476fa700c +size 963279 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index ba50803f917..ba159021f9c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:84475b3f5e813ed2661d1755e377850aa5f01f08f7ee06261dd5f5d05a429320 -size 883111 +oid sha256:6759ebb918cc05bbe306136a8ccadf7b814166c9e078ebec954910c539b9952b +size 910935 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index b6e44f67986..afa96e00381 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f26a5c5ec12499107590c3eb9ccd763398272e78311752899c3b2f70ee3785b -size 934921 +oid sha256:d256e13f75c31c72bd398bde92bb8f44eb8e33d8bb45e9109b200e97d62831b0 +size 908183 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 094056c8535..65e9aa10da5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7fba802595f5f8fd839df6295dc08fe67f411339e35e311db1e9887559764450 -size 818437 +oid sha256:8d1beae24225b0bb0a8738c5ccfd8b9dd8341ddd9edc0f0088a54bed0e124e87 +size 832053 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8a4daa77179..dfc4952889f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1af70d3ddfdaa72703ec652ecbe58477f0f3cb76a4f1f5d51c9e5075593f37b5 -size 894655 +oid sha256:6609667c3f905c6794c9898f1321636b0685c660bbd86e4ad4d2a8a1c620e357 +size 912661 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index fcffb47cc55..e547b9ba285 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1c21fe7700b0f8d4682b9faea8b6ea2c53a9f106e104517acf3296120f72c03 -size 845813 +oid sha256:8170c2fb7fcaf3ed8520cfd88711d08c08bc1ac546c849f5069938c9e0945a82 +size 863721 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 9d182a385ff..501810cf57f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0edb9c0f245817534c76c8ba107ae63ec642bf6f63e373945cdc9126b1e923ae -size 782045 +oid sha256:aadabe24f044d550e776781b20a82295480306de8d016b99efb053a6161f3ad9 +size 768971 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 8135e524d25..fc6aee2d74a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e046a09019aecab3cb701ad56588aceabb69c3c77cf8300c5d39100db87cc157 -size 695555 +oid sha256:a76600b4b37400d909240283c48250e048b63c752f18665f9a37fb66d89f5e04 +size 712279 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ac1abf5c87f..b90b047abe2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:516601339c92a5ea30f918cdac0bd88c892bb2a6b111a74611f88eedd2cdf30d -size 919713 +oid sha256:936a376283e1bb2607177cc8abc6a7580393ee734f0e6bc6a8c7af0264f2b256 +size 947487 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 87c50548b05..0a52f0d54b6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:055441cfa35794292f9e939a9f272b72e0b9d716e35adffc37b33fe822a072de -size 868157 +oid sha256:63aa2ec648e71804a514b2403529b8fecbdee6bd00b3a55a3ae676587ed636a7 +size 895191 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index aa46b5d74d5..2ea5a11811c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab1dadb627739a34ff63da1ca4956c42b04b5670bc7d08b434a609e56b418772 -size 885671 +oid sha256:f7f5f6a1b4b2b0a6fedc3ec211176a8f2b9ccbef365e51b8ae2779e1f422ebb7 +size 904319 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 0b11745bb69..b03674118c4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c78bbf2861bbbba07432b6fc3c8d5ea31bd6da90bca49e2f187aee49d1702ff9 -size 836829 +oid sha256:24e6cc8b2f3c61fd70015ea83d71341b0cd03ca912211c80d72aca3ea1476e05 +size 855377 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index d0eddb0c763..5f0ea380169 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c69d80db0c333d1878bc550b1521d8a12b4aeba81cd1b8c24f5a40e7d565a48 -size 922659 +oid sha256:3777101a0acc9a42ab08ef8c6297e021caa1169944dc255aa8a87472b16f1c91 +size 958227 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ccfb4501983..a1daf61600d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2208b9d28cfb6102fa7a8733192b875b26bdc2891caaed38e23b2e1897277d4 -size 834639 +oid sha256:64a0a5778134aea382d8ae33ef15399e4f7a015b512df84a340005fdbfe37ff3 +size 861675 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index 701fe3cf42b..997cedba313 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3feb8ba17caaae7567d344ac88b8d6ba8b32339b5a98ad6bc55e0119aa06ef1f -size 876283 +oid sha256:527229665beddf886038a5db6cf8d054238133e4b48074e3f5d0d2c75e3271f4 +size 910669 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 6ee66f704aa..0c55488f407 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f65e95b8bbbcebaec630ab6c14c7c3a89a94fef90a874787b28c44f2833d534e -size 783233 +oid sha256:183e2609d39a1520124bc2b7640cd3c12e04cbb68ed96b3b9017cd85241c2ee0 +size 809033 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp index 1b1778ab124..8c212788403 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f80ebc2cda597f17d4e127c56ed75be5c1060eecd2e9257e28e0912d704a30a3 -size 833471 +oid sha256:f5a094734070c27d2c059cf7e53240f426c11bb0bac534bab899f23c1bfe513b +size 825331 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index e5165960431..1ef91fe3cf6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:696ecea635d2c434ed007c1a59c6f5e44dbff02618566aa63c5bff63b643316b -size 730061 +oid sha256:ff9df5872ed4d35df96cf7b7039a7b10ffdb488827e27557f4f8d88b52b623e7 +size 748463 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 03b0eec1c6c..9e4d263e12d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd787479fd6fa25aa2922bd032ef765df3bf2a2844ac93f53809863f7726ad3a -size 742345 +oid sha256:a482242b9841133415ff053b022a7326fb8e940d8a1be61a0762e91b3d674ad8 +size 732527 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index c06f28f804c..a4bb5441f6d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d96c5587f984a3a10075b41f3e755ed87c9ac5dc7d7268cf033efc8e98d1d10 -size 660395 +oid sha256:5c8a9f4d96dff0355d743ecd6c2ef5a7fe267e0e124f69c9c329246579e8768f +size 677859 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index a736efb63eb..dd07d7c4365 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6eb3d66fd581778de5922f484ad3ee5c7ac79a5e88bea558230bd3b7a76e29ce -size 878849 +oid sha256:15791f313906c39fbf507df1a7153a266bbebdb60ab32c45f94312c3b657a54b +size 905389 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8a6504fff5f..5be29953b0c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8dd739af4c7a336080c6efc76d69f8b86a6d8b5ad7e07d7c6012684ed66e4e4 -size 796897 +oid sha256:4a1f753ab9a4fca5e1f1ebc3fc1f990749805f2b9335435389e12b19f4c01722 +size 815595 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index d0f6528ee76..14f73e71bd5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c42459fe36c4eb7fe28e7633724ed426d8b6e7ce3d8d185ca08e72b6b19ec14 -size 836567 +oid sha256:3c719cd499bfb24012fd640707e296153af5c0d53c28626111638b8eee1f893f +size 861333 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 56456fafd52..224720ee326 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:66004f6d60e40449f06aa45621dc45e128128afd3c2a540a1f33507cd807af74 -size 748895 +oid sha256:9e82997c27f8b632746f551957e0c100cc425cfdd6d2c8611afd1ff42734fdc0 +size 767443 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f6cafc9a1e0..7f209382634 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:addbfa4ca1c71adf0864374da2e76be520c8f106b6019ca2713730fa2702880c -size 945421 +oid sha256:4ddc89cac30cb4a9c2dff62d973679d30a1edbb745611ee11cbe482ef30105e3 +size 947147 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 316d1614e73..755c8c24f76 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:49ce8055bc057da313ffb8cf41e78a5b5abc4a18c5cbf7d1d32207d1a17870c8 -size 1118047 +oid sha256:542f805f9633069d92f37b626ba6931af8e02078f1382f7aed8a09e7a7720b5e +size 1070439 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0fe116d9b86..4ce87613d8f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0039cdeaba911742401fc27c30a85422456196df432194321d87e73d40bdfa3 -size 915425 +oid sha256:06f9df5c9daa342de7c76abe9a5c6bd377073dab0b65234d7baa50cd4350d5e0 +size 912415 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 59003606313..37d5429ae5e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41e729ab9310e0de75feba136bc29d09d83337630b5f941c16bdc7f5e2e41cda -size 966157 +oid sha256:633e33b0226847dc791d166b2dcfc1b560af55ffd65b2654195ff9c230b554e1 +size 934879 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 371148783ba..39095463020 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f8438d176f65cb2017dd3f8b97d6c6792687fcdc061a177e126439ca4cfe069a -size 929825 +oid sha256:abb14082df283757906c7020399adf5d0d4c0853f21d602c9c443cca334fe936 +size 932095 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c0cc7c7a862..d4636f10fba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c36d587352c5c594125820ff0120f17693279cd07a985e61a9475aa931335af -size 905503 +oid sha256:d24d955843b3480d2d7e70e687bc9204a2cbf34788b1a83462df1181a30c4b0f +size 902295 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index a5bc8a49fc4..7612f0983bf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:284fb2472db9827764bfb503398760d0a6653a828475bba05df64d8d3054f2e4 -size 940221 +oid sha256:cdb442ce39e2dcc465c3865b024773c30a4b597c70eb3fa0ce6bb92e2fc30d41 +size 946239 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e6d5967273c..003654caa32 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f007a9413f8fc49dcc856769cdb0854146fcffb46adcf4d8e8e39514518eab62 -size 851067 +oid sha256:446076017f702e218dd7d5b5f2934c7df9f8a96fb1cda306a72bd1452b845b45 +size 854619 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp index d5c67210e39..bfe1e793f0d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09dd6631bad2477ba728ad444a7df58284d27108d38ed57c86e08a08dd0dbef3 -size 1122367 +oid sha256:619b978973d3e1cc645aadec482e92d24be0b208353b9ba4f54b7e44df66263f +size 1093557 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 64b71f907ce..6afef9e1475 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f1b007ef070a9b6fb4361a412985da5cde69187a9fe1278c644150938a8527b -size 924631 +oid sha256:a4618cc75ce2ebfaaf5a1e50dc7b1494b08ecf7e0fb000dd327c8e680e34d678 +size 893947 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 98f14a49ab4..0c4d389cf45 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f46b6950e95fee9d23825e69bb4a10a0fa2fcea532c41830d9b4df1b16d9544d -size 910273 +oid sha256:7fb679350e8974fef419509986303f7dd8a5e87d52e51f08d899d632b58caf58 +size 910421 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ba159531149..6d443fdb276 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:97a1d52bb0b2f0d87d59f3850d7cd08395586ec751d855f39e5e8b819767c813 -size 824081 +oid sha256:4e3daec58851281bdedc018cdea275abeb25dc6f98fb5f42a3c89d3406703648 +size 822009 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b46445dad65..03cbb033615 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c7529eb96de3b59ebc8963fa5263a58aadd92a2e4b2bfda8a10e066113d3ea72 -size 1138167 +oid sha256:8c0a4a7efea99d4fa55b19978499c61c5e656c2c14d185c47c9de5eaaaa560ca +size 1164955 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 0c67574d51d..ab2caf744d5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27a0487541a8b44f216d690d279e9e8a9e2643fde00eb65036d85bf5765b1701 -size 1027065 +oid sha256:36bf32e22078186b661dbfb169b4724795a844fd19c8bde55cfa205ba1db5677 +size 1055333 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 3ca6b39754e..d85660626b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8d50f01a65688b1be3e82603b7c2bf88237adc9a3b339207339abc1e6c4e5e3 -size 1054991 +oid sha256:91a3a4460949e19b84e7abea41d2c89ce0caec4afa27d9c281b2789b80ffb258 +size 1059579 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8bc75fffa3a..1b1e6370abd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f77ba21b60e8fbbb822691bf2b534e8b91d912da04ef49026b43bf1e0d7d76f7 -size 1078077 +oid sha256:2f2978ab9e232d98a17c3a49654aad163e6badc3bfcf9feac41a4d9fba8b0f75 +size 1095935 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 0dc00e09e9c..848e1d1e794 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2fd6f3f60c164c7fad0327dff29d6e327c623e8088ad59b23fbf07b9c5a6e8be -size 970379 +oid sha256:c3a31f9425315990e96e1d1ceec86b524479154ff8b702d34279de2447deabf8 +size 989127 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index d8e1771712b..76c021ee6c0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:55fbbbadf94c4eeda61bb516f15225ca98d574507980ddc4f5e3edb8063d230d -size 905371 +oid sha256:65ce4a9e41b130f85c6a16b809e20ef46931fcdda78895b960a067269f767250 +size 910995 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7efd4ff062f..4d42b739971 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:69569ae909b817f66fdd5af8bba244ab9138cf2148f0ba13f6b1dd31ffb5e79c -size 1113049 +oid sha256:832ed446dc943f14ad185b6b13a8337cd94fb2ba07b2d9d8f3ec4e9ae30948d8 +size 1141811 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 7262c202b71..da9f97363c3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b77873302b201b40d1ef7c3d27f7b42d066a9d7049c9ab811d05ffc8f4d9c05 -size 1002739 +oid sha256:94c0eb239fc7bc7c6df8705246200c525e99989f7d24da3d3f1c27342753e13e +size 1031401 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3b5b73f8415..3012b264171 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:66e359cc013772dc425acded8805374cc3dd1b521bc9d762f28c34744938a0cc -size 1063715 +oid sha256:158c8158b59983d9d20d87ea0b2562aea78bc64c709f2bf74f4c19e93cbf8feb +size 1082363 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index b16de85d844..59489f70c2e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a34db6422c653605d45e6f522966b456c98657a782fd625459e75c70fffc6710 -size 956807 +oid sha256:8a0b05e48c8a766bb868cc525cff04c83e3e029be61fc992e947b84db1573a2a +size 975553 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 979b5be284f..9ee7955a1b7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3202e080e7fb3ddd2c8983344fa3bf8f1b36dfbea74c1df3e3d09f828395be5 -size 1122113 +oid sha256:71f31f421f2f02af7cdd139ebabc01f75725ab041d58e6114bf3815eb9736f1e +size 1156795 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 31f31f3defd..5a3d90377b3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:393fda0fc667c3943b9a204b826b161c5a6259ea9a9ee89b5fdc7b11c39a0cfb -size 1033947 +oid sha256:af7809ae10f64a51653bf44f549d3ee4c98eb1d5de09c086d9e599f5cbe65f7a +size 1063103 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index 094dc0e13c6..ed31df1bb90 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9574466697ee5267fe45a67bdac2d9522d6b397085335b85f7fa09202be9eb2 -size 1013529 +oid sha256:c362b2943bed072c5530c1aa7b2c5c494c45df49d6719c1c949ffd54eb36c66f +size 1047469 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index ca9f9096e93..eb47eda1ad5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c15460befbc02bc258dc0640cc0f455760ad7790f1a32002ee473c711053c361 -size 922945 +oid sha256:d34361ccb8a9af7239d9f90be3f38b311964bd7374b9be746cb80217b3cd3e9e +size 952495 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 184faa183e4..0ba66381721 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75e4dd3297f3c2764db459c6afca4f6514ea46442396ad72d0a6bd4da1f1bf43 -size 898485 +oid sha256:cb98c6afec651ab13fffddc51d5853595d3fc928df1fed30e8508024d9b975fb +size 901445 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 8e9a1b3f22c..6fd0d2f7ddb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f010eef34598e7aabcd595b7f48194f21991db177a20128f36efb8c2fa45f12 -size 869913 +oid sha256:20af84ed5260b9ebfb5714a3d89108c0b4b40f6454e254592f50d5ab07187fdf +size 877363 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index aee6741a427..577230cf9b9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:63e4da4b342eaacf3bf4a16c956e6457338339f00858412be29c689a3462ca41 -size 1057781 +oid sha256:b5d3be28531f60ec66aeeea15b157dbc2409cc6fc6fed13eea73ce6092a29ecf +size 1084125 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index df1e792b545..5b9503ce0e0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:edc59761ddb34f0993d52f4f03f01fad999752a8dc4ebd06fac11b8352ab5b27 -size 976915 +oid sha256:30db844f4b9d82a0ef1c841c1054a1e602948a4ff17a4190632ba5c5d45ed19a +size 994773 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index 135949a9b6b..041e7f96151 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2effae3409b73a7909c3a2fea8db1d5e15bd53d07c5464fa14ee155e64293982 -size 953439 +oid sha256:7b57efee794694e3acbe53499a4cd978e42f2c08a5ce5e5e39d18148ec495288 +size 980769 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 4be2f00d703..a53d14a521d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e53bda3267e3e8d0f5db9a9afe77b31870c64d5c949b3ddb52272bfa0f649233 -size 870057 +oid sha256:39c7e4041e6042bb907d66f83102762216c7858e51f2ad8a8d4386dbd21f19c7 +size 888655 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d6fe9d9d166..617797960e1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:85d697cc571496dd38a318e24cdc3f406e55ca38b1c54e117aeee43added1ed5 -size 1076943 +oid sha256:05c708667d8a8969e3b741acd0d31306bf00436dafc6a710ac7e02c6ca5c1641 +size 1104619 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index a56f64a49a7..35f655f656f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ddd8ed7dcfa8e73ab043839e99b149800160a5dcf1c1df7da4cee453a085d75d -size 995295 +oid sha256:16552fc79c7bb7aa50716c3b77b2a746bf27d1479671aebbbabf3402b8458621 +size 1023613 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index ba498e56830..dae02cf6670 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8dd27f732370baadf6d9a3e002ef19ac2c7467e7bb9d0c93482d52d91e51df3d -size 962983 +oid sha256:60bdc9194343694ef81ba891f6f68e41b01c6569df0e1a470fa60c9b198fa85a +size 968361 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c8de2d886a4..518b2e1264c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3927f41ebfb649125a1ac4ab353c6f3cb68f3b37d9f74199db5ab72d894219e7 -size 1016015 +oid sha256:1d6999921c27204e715ce799f2214d593c3884859ff78e80d1a99b29482c4712 +size 1034761 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index b4dab6991e7..e438db315d1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:478931b9ab6b528543d1e1279edb71da1c59befdfee4f8a1f9cbf11a18974c05 -size 938609 +oid sha256:ae3564f663adba359fe0abcacc8b67baae55c236bc88672f30ce2743d209fdb0 +size 956567 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index a7156ac2546..4772bb9b6a6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b88c5ecd09bd78be20f39559028154e19184f2a1e87638ddba5c739ed3efb823 -size 814103 +oid sha256:09da45789689770ef9cee319f05b059e66dd1129250081c1004d5e8e2ab0d529 +size 818987 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 849bcffa567..6b997d0022e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f2670d681f9617e4a4bb1066604bdcc7186979d14496be9407cd004d5a31e2c -size 1051777 +oid sha256:20ffa77ae90c5cd4c83ee5dd406a38ea918317a13aeba39bdc6ec21fafa63993 +size 1080687 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 6254012f52a..16497efaf58 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9488a34cb5cc6785d45503b3a97d095ac09050a387c8b8c9348413732dd0247 -size 970967 +oid sha256:c66447eedfc84f46dc1d5e611b2bc7ee7da278793ffe55f4144eda65a0c3b094 +size 998891 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8f1a1eb2a92..e618176be8a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f53455e7f0f6879f700c1f3eef523e2fcf71bc9dc778a871d65cbaff4ab6e2bf -size 1002491 +oid sha256:ed40c6e30519250a876267e61f52f149b1f8d6e92504c246fc1edfc6e84abf14 +size 1021239 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index f7b42978ca3..faa32bad81b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:30d226e3596a3f707f4d3be2369ee2c29691f53befdd0fd54feb39aaa3cb376c -size 925085 +oid sha256:a81c307b1c6684bca6c22c3a9d0338d63e5a507d0a7ddb728f5c7c1dc006df5c +size 943043 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 08671f85cb3..2740e848657 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:597f0d30915b9e631008466f99671970d860fd11bdf27348df85d0cbb003a202 -size 1060151 +oid sha256:a83410e00446ba05aed962dd418bc1a77c172c74fe50d7a9ccc4f47aeca26a73 +size 1094339 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 81e112281fa..a1fee83b734 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c5e8d2675bf13e6691e354bbf7b6b6783f91aca3aaed7544d3e977db5600740 -size 972033 +oid sha256:e73a0fd9805e6c71359b6b31b51ec6d4b7fedc737c56401a7902d3d1b35c624e +size 1002767 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index 682ffd6bbac..e3d80af31a3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fe14ac5594d85cf156848f98e441b641342a012c6c015d45dfdecad18aa9b3ed -size 986987 +oid sha256:b09bee4612d11e6d2495212c1cf5ecd869f0b72b348d4f4700eb7eff60c0de7a +size 1021175 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 30c8bdf58cb..d3a4b965d98 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f862e90eca0a9c55401331e9def9725bbe5a6dad590ad30b4ab2352ce0ed6465 -size 890433 +oid sha256:0dc92f0018e828fc6f45d95bda473e44463517b515b86d71ff7c61a3baae6daf +size 920773 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 6e9a9b4be0c..db7ca322c86 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8f78820241b48ce76479d7f220db6390ce8d864f01d5ab001117ce3ddc64161 -size 836571 +oid sha256:b954a2a3362bd0870779e1bf38ab17fe1c5e8c6c26998f6b9fcb1b7d5924a3e7 +size 840815 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index bbebd585be9..c61dd8f319d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:52ad6ebcd967ce7b3c11601dce7353ab2703503853fa93d8edec32c238391420 -size 778647 +oid sha256:7ef545431b3333b884d1810ade760d3e4f3b808aa16a54047c9556e33c80a0b3 +size 784567 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index da70862352d..78e706daf70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:31f195de4c21d53dc4668a2a5763e0c4e53924e900aa904574b9f0509ed34ff3 -size 995867 +oid sha256:de5363a753cc3b10a51ff70523669157d8101bd93e7b3d6d8baf40aa26395051 +size 1023099 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index eb143340ce8..4957f9260d6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fdc893e322c15240eaa2f448afc0bf1bcb8c1bd5e3d728a211f941548588c248 -size 914953 +oid sha256:0bd0064f3bc6f007802de11050236f8743020729f1b1fb650793c05543776b32 +size 934439 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index 8858f58bc6d..78cab51b27c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f826fe66963be088319ea19cd051e4bf92855c44f757af807cab3d2c6e4e3b41 -size 926107 +oid sha256:8ea49fba8ff0516f61eefb43b0b789a19eef5e6f26eccb4f15c4b1f8bbbc3165 +size 952057 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 2f1079f88c0..f21eb52d2e7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff491bb81cbf65106413f311010c5a1fd04614277418a9ae2ffef3983d5c7837 -size 837547 +oid sha256:9a9a7811e06541cda1396f9c39d7365a1a1c213e9a0a00b281e17475fa4ac35f +size 856935 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 382c1e00b68..da2c6461d28 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:339840090d6e92720e10883f85e3cf8234f5943d32ebc896bd975fa265fc20d1 -size 1085035 +oid sha256:5befc32fc7467d63ded0f74d805506e888ad94c2fb0d3a858dbfb6d20541f25d +size 1080841 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0045b2e91f0..e676e4c67b7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2cea0800a3bf44644c7e35cb6a4f1b006bfb6fcd57c2eaf10ddd378401fac1c9 -size 1047095 +oid sha256:0ccf8ebee4801992b72251043a6e5d74969fc002ecce0e27553ab637aacc1d7e +size 1036833 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7eaa95322fa..1107a6672f1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:637e781137da592c8ecc65e658554ffcebd0a162a84d3bdd53fea26eea158db8 -size 1062285 +oid sha256:f2abeb78210854d13c71a3d461c6ede2c713203b371330a8de863c4cda5c2ea4 +size 1057105 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0c756bf1d89..9e42ae6f6d2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ed255a6c32168f8e1417caf489c7273738d34c32f15a32f88653422b00af66ac -size 1033127 +oid sha256:0309f7895256dd0aa23faf2972ae50340246e3eebe4ae5d3530c7e84fcee2075 +size 1023359 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 6e7a87b8033..a6d903d1bb6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:87506365e6f9944c3d11500c222acb15e56a3bffe125e5a90e4a69d1f3f5471a -size 1081807 +oid sha256:2c06c3ad710876660d2bccac32392003962f89aae858a319b277622a05618379 +size 1080081 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 424a3f37d6e..e2cbd166af0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8639537776079f0fbed305f825717f78a63399684377d539ad81256bcbda697a -size 983083 +oid sha256:6a174c9911d99f8f1b736a7a00d3b4218b329932b366589efd36994bb60dbef5 +size 980173 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 6c0623a51c8..71d8724699b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6251f6e426aadbdd1eea017f0a7a6ba27e244cfb6e08c13c61db9c4b4cb39634 -size 1048161 +oid sha256:afab8e6c3496049fdc9ce8fbee6f77814871acff3843423e8054ca7bc5fc6a26 +size 1041155 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index edd98616e75..56558298ea4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b97dab3a8b3111ee7c636bbde84f6be5337997b1d49e6aacb863a5d36063272d -size 952397 +oid sha256:979f043aae8eda45114d5526edcfd3b1a8aa84dc45af3dd3177ea57fb3257045 +size 944009 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 6301880b4ca..eb07c368dc7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c26b1b28969985e7f9199453d0160531d419b9127d8359030db5851df1673af5 -size 748491 +oid sha256:0f4f77b1e0c88909a4feb4042a21e91a111d23b2efdc3072c294db16bbcb52e4 +size 775921 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index acf81e89192..0aa9d3fe138 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:59a03f7227fa414d6aa0afc612594c01c1b619e1bf4732a93c06e5b4bd4c7336 -size 746661 +oid sha256:c0b4d453df327e8f3caba5bb441f8f4deed9b74713db1be53bc46522966b0b9a +size 753419 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 8ceec6a3dc3..78ac6801789 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8feeb753eba1697bb67990889618433b9007a83d419c21976b10daadae01c1ce -size 717095 +oid sha256:c89cb50f6a369151fa14093db116a065aa82f5c44002f919460c52c552358aa1 +size 733179 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index ef0e7bdac54..cf6c88c1c17 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94c124689d79b47a8289edf45d33a5c03abebaff101eeef27bdd84c569269bec -size 642101 +oid sha256:b7bc0964329ec0dfcb0bc61149fdf810a60441df85a45f58b9dcfd5219a0f695 +size 651179 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 8482ab99758..b7516589e9b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d55215952bcb427722fcf4364ebc25674721825574404a8dcfb237103a56de4a -size 741339 +oid sha256:cf1fe7caef5d3129798d7002aa0100dbd17d780a7e4a6a392d3fc2844acfe1cc +size 768767 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 783e5c30ebf..ad542e057f9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1a7093589e5febcfa60f96bee01af01a4890a39f2c73a9191dd5717f854bfb2 -size 739507 +oid sha256:0a13cf28ece429ffb6c53e3e60edb3635555cf429d388986d86a5b598eeb06a4 +size 746265 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index dce3117b85e..cb7e57608c5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ecc439c625d1ca9659723dfba7c65a0bcb1412f4367ef71dd9b8f10cd39b9e00 -size 709943 +oid sha256:2362aa0c05c4e8f8e0546a8c0bcd5f8b3b1773c9b765fdf6cd1aa6fc1e1c79b2 +size 726025 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 378877c5954..866113c644f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46e0d29397f7f4a921bd9881b5695cbb4bbdad22a54fb3ffd3f109404bcd2a0e -size 634947 +oid sha256:f2c1b552f30cdbc29ed4ab81f048ecced49aa197dc2d01efe47a32ecbe95563f +size 644025 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index cb7e5bdc9ce..864e95ccab8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ceac9aeb056c3916d0d4b951e982b67f481cb3424759025fcb49a084af137ef3 -size 636403 +oid sha256:15e8ea3643849fc1b6ac2477c3d8c542fbfafa197a6ef7bbd85a17f5a3a3424b +size 649969 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 73852c2a709..2bf0a7a74ce 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3926f34d3f6f9c63fdef34afe1f9d6524ee76081a333b8eb3523d6ebf9aef453 -size 584156 +oid sha256:7f31bd2949471a1f9066bdbd3235c14c25faaa90e752473c613fb8e3c004021e +size 594466 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2256d714747..ba178842ca7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58a6536f83354620740756d9bd78daeac3fe1a822931b86895a7a3475195b3ef -size 624951 +oid sha256:e312c953e234edfc1e8bd89c6f9427a7f5d900d8bf1526dac7cd1e0544665aaf +size 638369 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8e1dec6e8f0..06ed03c825a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dad5213d1bd4299842a3d9c5365893f66541c2babf9f2866c39c1dfa7a4a1624 -size 579610 +oid sha256:11e66572a8325494f8995c0056fc2c062c55fc13e9d5e52f9ad67b31e40be8cc +size 590514 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 0391d692905..f3951a021c5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81c88683bb526108f1f11969186e625d26d6776422772ade8911a0922708a164 -size 627255 +oid sha256:d5f2d4b6058bb2d8cf13cc373179fbcb66aeaecdeec7760090a0a05a8975a969 +size 641611 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9c9e717e222..61e7ac2bc63 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df76c6adcdfa6137a59cff90713ef47111d269fa2106f4510be10e2fa1c77d2d -size 541604 +oid sha256:c25270f3bda1c35c25a1b045bac8ad41b96a1dd3de7433f7233bd77ee91464cc +size 548362 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index ceaeb306d0a..33807f1e1d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c90d7fc3f76f7bef9575d2b5b5943b8a2d9e4eb898b0a05a344aca4d7a5947be -size 573578 +oid sha256:9cf468b27106a82dcf9b7fcc1e39dda34bcd3d6768b76faf6248ecc0f54d8f56 +size 587588 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9f286552939..ce3a3eadb9f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:82b6eef9f068fc908f4de14d20c5282b63f43101337e6af2654e23858b4e35ff -size 499620 +oid sha256:626c53925c9f0e306acaa2e90c4451b2d301c87acfb01e17aa9e91a45e4a2712 +size 505490 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4c08a2b4cd9..2826fe6b168 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b739ed954d440429597b65e0106fca460f2bed26bb962e973cd415f5a0b7a3e7 -size 629249 +oid sha256:38ec0c2046584bebb7e7e7b6de4ce89a306513c44bb1f40180942abd0a31b906 +size 642815 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5c8fc506498..9d45c5a48f6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1eb1a2ebf893b63693f01adc8d2a9b1c9acacc37c6e6c431c2296beea63e3842 -size 576212 +oid sha256:c6554602f18250fef55e33146bbe41324708c222f695cdfecf3292e4b114d553 +size 587312 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7c8eed61fac..c159e8868c9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f920061f09630dfeeb9065c6b72cdfc3ce260a293305e7aa7425ac678698700e -size 617797 +oid sha256:292c878e1412a0ac3e89b9ba90ef65e6b2ee59bce6c683fb57f15a540336b7c6 +size 631265 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3b747579bae..e797db80bb4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a0e1f11364564d1792bf538162452193a738037b5eef5c29dc0a7af8aced97a -size 572458 +oid sha256:1613483ab644a2d6a92dda308519aaf0470ac9062d695cf98b2029b979afd6f7 +size 583162 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b0c73c8d090..4c5177b74c3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a06071c36447101fcc66d0e836c571ce70a835225a5c01fd80d065331bc28fa2 -size 620103 +oid sha256:f1466803c2e02b3d99a79e826b3a3f1fa95d1aa58649af04e76d4e8290bbf4e2 +size 634459 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index daa3d6fb495..cc5f289da6d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:57f95708c3a7c7f87ed257e18358a641ba21fb514bbee4f6dbe015f93ef5cce2 -size 534450 +oid sha256:afd580825f29ec2c59a754ec8438688655d69ae7c8cfa82f87ed5c0539212a6a +size 541210 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index dd7b4c60582..993c2f8c212 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2612bc5c7e1db925269b7be8554d3277d7897979c354ea1a95e4cea97f2465db -size 566424 +oid sha256:bda74f0657e32987a7d846d950ac92005047312716951927de2b8ab78c8ced0b +size 580336 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6ce85f0ee8e..4e6e625f152 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d808b7dda096d111b3559d851a5388812a8180cd1e0101c5109f87d9a0418a9c -size 492466 +oid sha256:c8976210383c53146885e62747e6fa15fb7db5fce54ebff45b746050f4a4002e +size 498336 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a656326163f..177db13cc6d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d0c164628550145e0ab1a88e5a80ca4b6cb854add5efb0de1712c3d285f4051 -size 656169 +oid sha256:cb31a7520373b9a60bd935e60c39e1e6b7fb5ab05cdf68982f2c537d57a01018 +size 669341 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b2454e55892..dae40707944 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d83b14701108d6f31b5044dc4d9d199f0a7bb0cdaf20d560b9321df2c89ec3e7 -size 604120 +oid sha256:92fa99a053eaeae8fea385e3a91007cc6ccecb96f1b344cdc7ae396b8b2d1bcd +size 602690 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 34b064118da..faee45ce4da 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:227d1b48070846ff15d2e564b5341a0c509f34e933339ab26f5e685c55b39a59 -size 644719 +oid sha256:5b5cd1a41fe63e9292e5dfd74ec702fea4b7f9ccf98feaef41f22e17bca44e99 +size 657003 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ff486a73c4f..c1164b3cc0a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2b3b6246e5b3943030f84568f89e38380ea20bf8a97c954c65a4a36aa8bf83d6 -size 597996 +oid sha256:ee8f78883a0f337e7be058c61c3d3f0c9e207774e0dea4859804e68db9bc51da +size 607716 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 9a72ebbf7a2..d93a1e86362 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa72f968084d3c941b9d60e5b0f115afaf4cfafac8255b3ea4d7f5318fec8b7b -size 645197 +oid sha256:8ba3e5c341484a9d68bfe97b2b7649ee8175287be8c157e5e9be14f1341cd411 +size 659701 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 523b3bc1b9f..a99f7d4396b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25e95768e838dd47db87f5ee51bec605e8d1a5aafb0439c065671d38e56eaf76 -size 553824 +oid sha256:cecca88e497532c88de5f4a69cd64daf2ec83ab51721392674fab23db8e2ae57 +size 560828 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 9ffcd541f9e..cc2a5ebf8af 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eff252c7406b58e39cb6d11d21b787478a55a228c1f1ad0dd1216223777c03e1 -size 597144 +oid sha256:1562f19b055cb7264e8b909e61977455020e426afdf86b3e847e92040b166627 +size 612338 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4ef4d899169..edbab6661aa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be2486e3db151c83f5c1e9b63737d1c746673ee67b9abd9f39ac4bae3c6971b7 -size 511888 +oid sha256:6dc9cdd3d14af016fe547f28035e3c359e6a1d590f861fb495dd9fdba123ba1f +size 517956 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 371745751d3..be933ed4bc3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:afe696dceb3c398a48ebf1ab2020df4ebbf5bd4eddd535f470cdcc133e015f9f -size 649017 +oid sha256:808480c63d99856ab26d664f831027d8e192b4912af0296a4c00708ea7917d76 +size 662189 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2d4b8b73ced..86bc5ab2424 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b7fb060b98a468cee7447bd82c94805c8a2b0c9874fa8188fcf83b39e74a27af -size 596966 +oid sha256:e98e9770d1228851754ceda9e197066031503cd3abc2a6f8c506eff19a83b5ac +size 595536 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e42b87af061..abdace99aef 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77d924d0b3ee068d30559891224a6c87c64604c4fe10e939b98b9ab406aad146 -size 637565 +oid sha256:7535dd535f310db1912dc690c071240af752e0ff8e7e271559f067ae6ab00094 +size 649849 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5b056c91c7f..ebfb25db485 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c250e66f1743619bf393917b39e2a088a7433c608a3c94737ad01a8242d5eb3 -size 590844 +oid sha256:bc27edb57760118de4768048c7119e82b0f6a81f543249404623cadcf298b37e +size 600562 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index ec17296f7e7..33efc36348f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3b8320772a2dc9e970253a70a45f60702452a6fb9054fac687f8e9ca9c49efd -size 638833 +oid sha256:38d9ab413702983d3b5785bd55a11ba89366c21ccea5af585dc9460c8842dba3 +size 652549 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a21769ccce6..50c3f28fd7c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a663ec5e4cae8e3d56232b0297ff67b005c1d5be140fc184bffeb3bc232729bb -size 546720 +oid sha256:8a1b66dcdc32d2bd1a65990fe432ab812f87cef86e1d0743c9c5de0ec3c11f78 +size 553676 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 13674b43961..444b2649133 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1bb7faf58bdbc15788e801a6812f15c8553fa5ca47657325942ccb70b0b947fe -size 589990 +oid sha256:2f54479401735edabbf0c27418e14cc1b29f0bd37c2912b21c90ffb1b6978001 +size 605186 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a05188f1b38..d505d354d11 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c242cdbefbedcbd73bfabd6ba785c42740de3b22cdade5f1c771fd6431e1454c -size 504734 +oid sha256:c34f4f746f02fee5425a400309fab2b341e6c59d054d95554143c61b4ed315e6 +size 510802 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a37051899dd..39327aa78d1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9579fc956a7eb4b386475116acaa09a9a946ac4579cd04351da34b2227cac5c0 -size 735167 +oid sha256:8dc1bf9d41d3dfeecedc36f175bc92bdbb5cf0684bb251247bc9f1019525cb29 +size 747699 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6fb40773d08..4b3ffd84727 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9641293ded6bf20265ba6485eb6f56a68baf8df42a57809a47d941a90d43a270 -size 687065 +oid sha256:e7ebac7c16f8e90a3240a2a3a3fe345ee226e694fe2c3be574f7b63ca358b97c +size 697623 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 59f0e920c2e..c91c3ee2682 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:efec0b3a41d1c07d0974ac9ce24c5893f660fa3c21369bed3555be608ee939cd -size 718783 +oid sha256:1e492c739c8e55f3b83d53089a4f993d89c85e792d5a33b0f6e9d2144387ef9d +size 731117 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f023587d612..7728b05e06b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8caee005b2c51ecfae01f16fbb6e3b85d918afe74cefd598979d4d6f51a1bf8b -size 677983 +oid sha256:000fc7a730943c6afcc5eca262992fe44f2abaceee4b2613cb0bdd2ecb825a11 +size 687553 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b3ce4f5fc6b..eddb9080c62 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e872db02fa74cf730937a780b3810fce93cc2f2c7af14b884cd188b55f2d15d -size 724047 +oid sha256:16f2436a0f14651f015cbece8f5dd3335add1b642801eba0b74b9dd12aac93b1 +size 739095 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 336d8390dd0..a03b5c84f2b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2949d7cbb3192d9ea9c6eaf118563643a4f7529145654463e28e9f43152631cf -size 637707 +oid sha256:a7d1f3e97c376d6170f3bd4d5d800c0df26b921d4819ff0f30f57513b3740d6a +size 643923 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index ec15afd1133..a56fa699d48 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:092acb8225b1e36a694f2b1ca4547e681f5cfed87bdf26ccaa38137c83a14b96 -size 666671 +oid sha256:fcb5fdc4767c00a128d651a1a138bdb9cf1ae4fea39d3779b0bbf5d2b836865f +size 680485 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0dce7c18648..e3472d94fb0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3bd4e3cddccd7123062dc7e9439836ac26fb034b635e2261ce16c39910fc78ba -size 592366 +oid sha256:3805d084b876f22674d8d73ec277092c895f594a91bffcbf5071a7c6ab83ca42 +size 598434 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 234694f78cc..761f854209b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:005ae70515b9e984d722e07cb94de49ccc25eb4e87147fbcb01f4b1c938fb818 -size 720861 +oid sha256:8dd9c7342d5162eed0769a31d0d92f42d8cee000d17f39e084684a1127591c06 +size 734181 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ab45571b1f6..7de7b8738eb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77929d8c898ae55d375c8d14916a7d0597eddb5b7eb0ac8a8b83f745a8a60b72 -size 673549 +oid sha256:90ad4e88eed85a563088a7a7aa2bc76ef754e4b6e019a23c370f87460a7c1f45 +size 683365 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ea1a590ea5a..4c7e5b52f53 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b17c742716d269355689dde4927afc3001badcae530249e5c046cc6284998c0 -size 705265 +oid sha256:22f7e2816fa94f92e42da1124d4bd20aca16d648c336bb80355bf9d0539930eb +size 717599 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index de76a367d14..927c6690550 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0c5201aab57697f19c1ee21ccf8e155097c7590daa173e3657bb9c675f22ee7a -size 664465 +oid sha256:b32a911663ead9dc2cbb2e8e30c99d729248d3923ee8c2e1d0bb8c1e2e8b9476 +size 674085 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index ea40951be58..59f943cb4b7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eec61afee610ab36b02dd540ef9671b4cdf4a17a5bfef92bb432bb83a4ff3d58 -size 710531 +oid sha256:9748db982a904e779446747e14fff026170332f6265094780f05dff8087508df +size 725577 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 71741a3f8bb..347974fc7fd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:925740fa7069efa24e1a4d76693da0ea041c4681ac65da5e3647026fb78d743e -size 623449 +oid sha256:669315ed58e0bbd0411bfb65e94ba828b5239b37b5b7df4a0160b194e34ecab3 +size 630405 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index fcf9902c4b6..c93876430f9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2393bb1b0077a5b2c0f5c5e8fb9e4f5e26c7c43c2dc7a45344ea551f08750ec -size 653055 +oid sha256:2c682323ea13a4ac6fdd4f766c0bb7add924ca3740b677270da08a1c40c83539 +size 667065 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6dbebb0b6be..af450d227f6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2bde0e6b938d6b6a3b232951f5665e23a6a649ee681f60289c72fb031a920127 -size 578060 +oid sha256:7958f565e79e2bd484167830daa517e698407acc540ae5f1055ce685d53578d0 +size 584966 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index fa9bdc5bf33..721280f16c1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d436eae9debbf3dd46e90416f3f4142ad7c23636ae644b24704c7e8477a82c7a -size 754837 +oid sha256:3ee42b4fe8d52bfc766518f95abf35b908a2d25406e7874f895a540a2d424d36 +size 767269 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c66f58f19d2..c5c7605f94a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e46841f469be9771f5b3539a282b889e182ab1aa170e30c60620ca70c32c70a2 -size 706685 +oid sha256:dbc9832b4abe21944a37c2e6f74589267e289ab6c7edf45dce9c77043ab94510 +size 716601 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8ace2219c51..5b05a7501db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b171185c41b6f04a2537f6f10a7342f0febfb11dd37597aa9cc1ccf9d0a60fd -size 738451 +oid sha256:f9594a6465b7d8b5dbde3972b5d54ce7d944f76b5b3d27ec72b45f38ac926f40 +size 750687 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 714b261fc8f..a760dc7be86 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:772ca30d894e88c0b4e546f337fa984f128bd7cc058e43f5a072099b723b82c1 -size 696813 +oid sha256:71c8d08b8cc98d785940a2db65ae7a2ea86741a3994677d6159027aa2e8a9d82 +size 706531 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index aacd9e8a43d..95abd443c1e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b452c025df554af419dfe1430266475a350f06ab7aea0597cc0ba652fa2df02 -size 742779 +oid sha256:b2b68bbcd4d1acb271adcbae81950abccfc58259480f060c31071fe4b0efb237 +size 756395 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b46a5666c05..a7f55ce94d6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a515ba8a08e00dffac82a88391791d3c88392d3d6c3d039aa37b8970b8208a47 -size 646029 +oid sha256:75b5739e78526b292cfcea3f66e5f7bd6764ffe024eaaa8372c0a846bb0abcb9 +size 652195 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index ff3ee795bf4..8e43d7b0621 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:478f56c7078f3720845098c7a3af07fa9a1374d172a38440a188cdb6fd53d6d3 -size 690039 +oid sha256:d504278ac4c76a74dc73d020dc1e2030b5a511755fd8fd919c35abb029d18618 +size 705135 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 98d20b3afdb..3b260d24e14 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9aacfeabcccd17e04a6380823a6e3ce03fdc0316fa4f44f527bb8f4ae8141ebf -size 600638 +oid sha256:74945ce97e9ca00e27c0fd2d2de288628afbf37be25ce987b910c37ba1ac5448 +size 606706 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index da8987a8a63..22bd4803657 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b590794158d01a2348630fddf1c45bd67d48d11aefbda9fa9ba8110be39fcbb6 -size 740529 +oid sha256:9c59985c880f946112f14e919223a401a82add884701138a6d6b8ec925f62139 +size 753751 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2efa7b1a477..55f1d3ce49e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:615d9daaa891e6fb11795fad0c60bf0ec1c056bae6417c769fdb1c17c6bb78c3 -size 692379 +oid sha256:a55787a5d96ee14716fc30c0fdd9af34e1318f129fae81c9ab6a3382d33d262b +size 703083 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 40dab1a4662..af74f25368b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:781cab9f67c7bce5292dda31a7df1594255acb3dc0704878f68e453307366d7f -size 724935 +oid sha256:22b47a25dcd73d51dc921a02ae417de1839b0580e281d86dcf4a0f432b05562e +size 737169 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index fc85e0788d6..2afe61121de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:978e79497b6125bb03bf0f04f4cd2b8bc25cab89628749ada04125dd55cd8c83 -size 682505 +oid sha256:524cdc4c63719d432a93dc4cc9a2574389c5e375111e6b39def97082f1b48fad +size 692225 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index c6967d50a5c..33cc0efb769 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90e6d8c4a00ead29f04ea2b6c62c9177eb80aea673bf740a9097c20a9bf816ff -size 729261 +oid sha256:18e87f42ff7d9aebc925c990ff31651945c959e137c15479a4d5360929b70682 +size 742877 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8942c7ba552..5407f4824e7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:264f41d8d24b9116085b9ce8e65c8567b159b80f381a4d93a028d393da07b0af -size 632511 +oid sha256:03b1839d309a3ed82dc8aaa72ec73aef325d4e3fa98e5558ad2b5ed2403bfbb1 +size 638677 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 30c1a06982c..d6a234fc672 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:33e816b3eb393aac02ce70799cbc429092999933be6b59e9f7c96e81f8c50088 -size 676523 +oid sha256:32a31cd72d9fe98dc2699c2cdaa90b67bc1ba47387591c5f5d5ebb245a2ce14e +size 691619 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 636b01bb683..cfa000d896f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ce68c5e7e229797dacfaec2afe68c8a04c0994536393929a9a50cb372635b506 -size 587960 +oid sha256:7fc3b435c2d53ffc197f97e3b1f5bead1cd38d81f63b9581b56faecd97311583 +size 593238 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 74bb27c35e6..154d35423e3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:57c8c38cc5f70a8d9d3dc3a6bcb4ae151e293d5ab502d217fcce52d36c38169f -size 693483 +oid sha256:9ec08d4c2582d48ef8f745393bf7f6e12661fa0279852d8ebd68a0196f943e93 +size 704287 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 9e24da1c945..32292383eb0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8517487a6977a073dca15c586a024b439acae3b2b9b25cc45ea4fc42a7dc944b -size 686767 +oid sha256:4ac39c1699bd29793ad99bfa905bdec1f2669d36da462b3983ea8674e241392b +size 695351 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index cef0ef84837..71d04e150b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:acf5db4a4c9510012315257cfdf19e600d3c7861d9c52fcc7ca6163c8434fd9b -size 661101 +oid sha256:eb374ae08a7aed1d477d4154e6b7248da6cc26d531f4add335b3c4a44b4248fe +size 679205 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index f7bc9f87626..6027fc2f976 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79866ffe91548a48e964d867d1ca7261d685d2d966155a6a75cc78db1862232a -size 584724 +oid sha256:6f678588600e473668b88684bc2b4fb07107f2cb43d582f44df42e250c53c921 +size 592468 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 5ffc693efc4..c14dba6a3c8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:247aceb4b259e35d66f7bc50aaa2d89721dadc19edf03d8d444cdce9502286ba -size 686329 +oid sha256:5ed526bfd5c6c1a168458d3628c825b8049b6bf3215e2aef6fd9d3ae7a33325b +size 697133 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 42461a711a8..8fa8885132a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d5d79e4e7790183939d3a0bf410fc310bf5e9e33bf114173b5728fda17e20ed -size 679615 +oid sha256:f55dacc78f673a126fc791aad2afda5cd1e7de5dfabddebee30e902308a9b809 +size 688199 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 6cdefd98f5e..2b5e7cfe202 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ed7af3c402bd8af57ba639bf12596d9ab74d929e25e35c9011ec843733bb33b -size 653947 +oid sha256:fc0e175ef337841e8f882938cb9865264970414da30b53e970b11b1355315e41 +size 672053 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 8261205a211..12c9ed92293 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4fb4e54a784e8291b8f4914355776a66e37f0d4c419f612f2139dc634fb56927 -size 577570 +oid sha256:d0eaf016fca1d00fce468074f6a5a239da1ea879b4129cc485f36a9245e809fb +size 584526 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a42aaa578e2..f2d24f6144a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7af153dfd3c8797ed8f2ee4f479e5efd5dacdfedf88b10e60347db5d6ded074e -size 615284 +oid sha256:e183d6da6884d46551c56511af241dba67b67d12802a9a7cb8c5066ea9ba7f4c +size 627669 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index fd9e0eeceaa..8dcb13ccef1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c9b3fa1e4a0787af1c185bb4b5c54d00e9eb079fd4ea683a8efb7511fcd02346 -size 584350 +oid sha256:3dd48505d501b3ffd0e191501c40e7bb044eb6fea2110fa17ae4e3ed4848e349 +size 594366 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 047891dd6c7..342fc852ef2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec55e5e1c837179f316ebd44fa5ec94d79232d515e77f5dc9a6348101ca2e5af -size 608816 +oid sha256:106a3b4a256182f6f1c2edd539fb01f2a91c66b74e98781b53c1a8729eea434d +size 621299 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 44d88dd0fe4..e1be4826043 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:42cb0a0267457ba11d315276bc2e0e0ba81f3231007fe9d645b152270a17bbe6 -size 579016 +oid sha256:fd13428a05303612e3bc773d6ffc00a9e4c3720651da4372a46ea75ad5bbf754 +size 589624 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 7ea60345ecb..b84cb7d0024 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:85d8eaf9022c9a5af6fa379e2de5556ee15cf7003eff5b60b068f5201289a00e -size 610628 +oid sha256:287939a25d3c5751f9f28ebce00159976d454019c85bf078f0dbf76d50ebbd5d +size 624047 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5f0fe3e6e14..e2c5c57d0e4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff9fb14986225c9e84184ed1d05420c55b5f58d48a80deed0532709b517ee002 -size 534744 +oid sha256:5e2c0243665c5496ec63929bd39d300d5a469912f46020bf6d6e05ca2c681e2f +size 541306 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 7cdb675479e..87b1fe79e3e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2155bbeb7994de0ce470d2a7d87a925c18a225484be313a8257353a39f30d05 -size 576042 +oid sha256:c5d01eecaa4434de8b6aaef9605ec82765e213a2f046a06e784daa7406469284 +size 589462 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 072648dbf8d..3d840e4b3f0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09f21b060f253092bc4be3ddcaca6b4ba202ca9a738d1b20713ca994e168ae9f -size 500456 +oid sha256:ee0ba2dac5bdcea934e67cf31d66165dc314c089befa1ca839641349549e1617 +size 506326 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8d30014a98c..a5d434bcb70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a14be61a23483cb327333c8f7fafdd1835b4393af52a22f564b4a161566df411 -size 608132 +oid sha256:e1e9f2b00d1bc2210d8a4ca0ca58195f62e10947ae02b145f66d971fb6f604e4 +size 620565 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e68853744f2..3e546800bec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0daf7186efc26058ca75fb21244f796ed527eb87f0809c21ac115df681b1182 -size 577198 +oid sha256:27d0ac3156fc77a3bb7a3152f44c4c09239ab817466f165f133440f876eeee0e +size 587212 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7f8344a180a..9a7f906c939 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:674d0954a1507b774d45f5f332fe10d86b40d77feb6fd52984c67a8e7d814ffe -size 601662 +oid sha256:182f281b84728c4c706103df1cc6c0c25f5e9b0f68327ad1adb2e188a882fc33 +size 613354 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2f9aa487f8a..bd456991d3c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:95afcff3eef7ea774fdbecad4808d73aef68275a30a7bae7e02490cd3934cf72 -size 571864 +oid sha256:0e7e4a5595d14cd1371ebf344660454ba3356c86481d30413cf366cd1848ac6a +size 582470 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 5c5de39ff47..2ac08ac0fb2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0058968f06a68d5aa0efb315f69b1dd68f738798b79381c379ad1347119b0ece -size 603376 +oid sha256:11c4b87cecc1144d9cb27ad0a1ffb96bc5e93c34044da4e7fe338a5af76cb2ba +size 616992 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8a1bfa18229..1587c8022dd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:708ef38bc1cf5cabd501bab6b37701cb24d9630892c42f694ff4e7185e96b3a1 -size 527592 +oid sha256:18aa6b82bb74a89d522372e83edb079163279eb25c9d60ef852b4af3437a379a +size 534152 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index f9c02b4491a..e8edd89de13 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ff1e431410d9f3634acd1808a668c1b7eb684e715df307e1e9588fab1f1eaad -size 568790 +oid sha256:7d847326caf3fbe4ba412f201d3283b5f97f6c93bca0e968344018636a79fe19 +size 582406 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 43ceb6fe5f4..fc54515464d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d1b58d392001122e41e57ed3721afcf0297f5b98ab182d212415f8cf4626809 -size 493302 +oid sha256:ce02a790ff216dec7fc4527eb70ce29fc86867c76d1775f901cc37e1ffa5daa1 +size 499174 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index df7068ea275..beb0c5aa299 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8cf80482da80e1f7daceceb60a6be61d08accd5581b561440a4abc5cc8aa1c6 -size 635743 +oid sha256:1d46fb3b0af5c0d7c8fef44fc80ed0a5164824c6945a5e0c6be5a959bf1d7987 +size 647879 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f246100ca19..1aa1b150ec1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d67fcb008ceedcefbb94a150686728fbd1a6834492449d84bd69f7b501d54043 -size 604908 +oid sha256:a194b8b4cc02797966208651f9d69601fe02df0975ef2eabee384162a819e02d +size 603476 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 735dd381a79..1cbdabf5853 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:45d718d2424b89ec26ec73255d9621f2ad61c3f4085515c72ccf73df6300a42c -size 628585 +oid sha256:d34d4936fe5a1d27fb2144f9735f744fda0c7a70efee7d00f6cb8dda67f104eb +size 639931 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9765251c322..815607a0a0b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:700ebabdd5492fee9f0428d4285e1b021de83edee0ecaf0a4aa14e0c0603fd44 -size 597206 +oid sha256:cf241f117c8a86003629c6a1e2f9f2f2fdea43acc1f27e27e32b0df03e087dda +size 606826 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index df45fbc548f..6fd91d085da 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb66f52b0e344ad87d143ba0a87e2bd477158d000d2c38784eb6201480a4e41e -size 637253 +oid sha256:7d0ac6f33482f10ec51c6424f27b2edd0dd3e8b738dc57621f5822e3011e9830 +size 650819 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f74b3d49a5e..268a320a8a0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:84350a8b0b7df3156101f612001755aa866161adfc3f43ef1beef52b25263f02 -size 547062 +oid sha256:e6da5e9ec1e594d4758f6d831700450c44d9871f4d4910650131c66f5f3aedcd +size 553822 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 5a4650f6d7b..b72cac61285 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8835232fade266d0244cb3bad9d476689c869a82b68f5419c177f35908f3211d -size 598030 +oid sha256:4284f9a18382429b7bb279e175ab0cadfd31afe20f50c0745eaf8c0b3d7022ba +size 611548 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2e1d0ad9ce0..2f777a0f414 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5cbb282e79901b5e5c4d3f11acc49ab72e915f39b1271129c26b8b6b2da4eb2d -size 512774 +oid sha256:0a4470d714ff2b97176bf672e1d983d3618240c8de76adc3223f7fc14640a874 +size 518842 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index cc74956e9f5..f80c475ba01 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae3821af8ad3f1bf0b3e7c4a960f348920997723819c4a61fc0703ae189fb83e -size 628591 +oid sha256:14dc81820696b1711295cdc5bbae1f94dd9d786dad2ed5744af25eaa18a13aca +size 640727 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 31444045113..947a41e0300 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:49541ce1754b1f477f91cfa06bd9c6ce806bc49c4b8904344c6ce40bf323ab80 -size 597754 +oid sha256:eb88b283d5f057bc4937ab2280d66c58ea1a6cc33c4f84be0a0a71cb3d32a142 +size 597112 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e48eac66160..9e6d364080c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:edd648834fa83d39d1273c6c82cd73a73a6d0fb86ad49ad3ebe651ce6dddaee8 -size 621431 +oid sha256:2c3dd723a40b04f11e325669ad0d2e6bb27a7f688490aa3dc3b79f8e8ed71059 +size 632777 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5909e84d8ad..1023f8b6a9b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3762ea6e1f400dfb6e2645796d3c7d7f5065b5b132783e4dd9a301534d33a684 -size 590052 +oid sha256:29270c022c7c8b342605dca9bb141a2a161a6fe409bf404ce3d340ac0d0ef462 +size 599722 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index f5c52db0930..bcf9fee6ddf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fe997be3ad934ecdb83ecc5e260f8e5e619fe7289d38ee544c8c6035f43fb314 -size 630099 +oid sha256:4caa9c37a17e49584e51e6b0dc0fb827a44a90659e18f5c4c5a9fada63d2166d +size 643715 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 96d679c02f3..be9390ef941 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd456f346c9fccd2a963b3238fde504dffeb488a8b74ccf183519c574bb2adaf -size 539910 +oid sha256:b60bf73733ac5916b82798c42fde65eb67c1440c152002131498d2a6fa9fe561 +size 546668 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 8da39ffb2e2..a3e868d4bd0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb4ee047e905c0d09a555b5fb32e9bbeceda0ef6f5c48a64ede30e790df81a97 -size 590876 +oid sha256:25ce2ef4f41f86b09918e1a15e3bd4bfbb151ad7767dbe931d6dc97463e3de1f +size 604394 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b55fec47f1e..2d6557e830e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d55c1e2459fadfc3f1a6f0375bf559f0a8e26839dec1c73cc9db071baa7748c2 -size 505620 +oid sha256:5996073f0a4fd6219fdf660c23206c5962e9c4170cf6e7490eabb0179888a898 +size 511688 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 609563f0819..e7205432f8a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:999072651af5febdc1af24226ac674cc6bd7e8cd2da8510ee3321af0ce971137 -size 811819 +oid sha256:8d143608f3c8874ba25c32802041b08bc9326269bb2c1123f41baa07a3e09bda +size 821143 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 0597a76e201..071f056c646 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2fc7cf7a633ae0bec67c06503586c25595d08a1fc61edb6348eb1cbc11f3f05 -size 786113 +oid sha256:bc92a9967fbd82f3b516fd732515002f6f6dea8c438005887e06eaf8e999359b +size 795389 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 36289632aec..6876e7802bf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c6669beaaaedb95ed736d913b4ba219daeb7172f36e42ab1bd0e3264fe33d91 -size 710191 +oid sha256:3ff88ae2d97cfaa50b05964af46490edf134f13b6741e4aad120b42a6cf54315 +size 714779 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1d88600b5c2..6fdd676e44a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9a4616e6ac0da75801f017ccd3a1a8ede2eb5b44f6f84f54a79fc41ddebd78d0 -size 758437 +oid sha256:717441d8bb4ced220d2558f78b4a4ae8dcbcafc1afe065a648460e32e8a35468 +size 767417 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 02c54ffad1d..009530fd179 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1786ad1489a559cb97ed50fb2d8ac0cf2d2d38e39bf1d2cbccd98341a428fe07 -size 734509 +oid sha256:bf48db6310e958a01c7769e9d345b71731651f42c036b8cbb1fb5449e2e9f485 +size 743487 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index c6a3bbfc01c..4255861919f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27e741c6ed3681abc437b898cc8d75ae119c0629337dd4fee1a9b838fe499a60 -size 605514 +oid sha256:ed79d375d70950734f59c6b968bb27d2da0ea25273ed5027ac4970c1a3f3a336 +size 611730 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 599834daabb..4ef1eb8bdb4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:59cf02b66bac7a62f6b3d7b5e3801c703ac9ae2a583f383bedefd8cd8f027de2 -size 801897 +oid sha256:5393cc04ee1778e68602243a4085a61ae3a5b9a4c1854f676a9c46b837c71f0b +size 811023 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 5f4dc0ef2af..2b77fcac377 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a0a6746ff15036c975f5ada70cef8bd5412a4b84e02476dc3f9082dbe2b5b25d -size 776143 +oid sha256:c9d78ee8c9e9e9c14bc3974b998f77572323e58d93463a85905dcb8ccb16f5be +size 785269 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 706d4ecb4b4..5e7b9b9148e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9bcd1e0c95acab3ff12fd6d3296dd5001b81c66eee8de62ab6f020cd5dedd79 -size 754239 +oid sha256:d0e8b3475fa2e4c6a0366fe3d315e5d0fe78307be9efe15ef81279cc2ca91bb3 +size 762971 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index cf1ecf1400e..fbb9606e51a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d73e06922da93422b1f1760e80c5c0f3459950800c33628d0d1fc23470e587d1 -size 730211 +oid sha256:601de265ef0ec1cf8f48109a91170898ee0728fcc284c0e8f6ab7d6480868858 +size 739041 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b8477eedae6..0b7975ef0ed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:739781b3d8c23ff2ba37e481aa3466f6f2c6fda06a5b75533ba908d66ef80312 -size 805583 +oid sha256:e17f1594388d4546163841989391d1ba82c3347048a1eda0de079b82d5f49601 +size 820629 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f1264caf2fc..7c5276fc5f1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee2e63216961abe5836bc8dbce7831958469dd55211be3af495442e34569d708 -size 717219 +oid sha256:553c227798787e5e9795a0385ff494d4b6ef86c0eea2a49983e2f7ee20265927 +size 724125 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index be857252e5d..bf0f4fd5257 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96c37746ee0fcbdf6e56ff70212414c0cd5491424815cc631a517e3210fa5d73 -size 784169 +oid sha256:a08526f6785bb3cf9b8b80e5d1be8ba57d85d91097be563ff3e53be948e0271f +size 798477 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 94df1dff7c4..4beacc14d3b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d5b2faa21fe35d4bf98f2976c5fc0b07e6b404644fa3052720066a7aa826f6e2 -size 691809 +oid sha256:7b08c38d850f5f3cb3aebef1d7021248c763e7b2a7701624a113fb1850014656 +size 697681 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 88849a997be..d441819aef8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b50306f6ced6bcedf04a6502df42ee0564476001cc1cdd30f6bf356753b9676c -size 645003 +oid sha256:3b1d4e871de7ad40afb61c017759095caf7c35950d1e6046f890dfbf0c88571a +size 658323 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index f3016dd313d..6f3f142d802 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:99ac49373599bbccd36cfc1a12b80aa68b6271643b590eced95593fd5a6af332 -size 568378 +oid sha256:5b8102af2eeeb7c9fceeb233fdabcb9a52c5143a061a6a61a82acb7cb07c5feb +size 576666 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index fa05e55122a..44e74ef1987 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:397895d82c2f697709a15ee8811493180a76835e820d91ac104b4cd0daa6fa85 -size 761131 +oid sha256:ec9403511cb79ee28c8e4e793d4eb983d988cf94d486131daf7ebcd526537f7a +size 775389 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a079660e194..62deaf96474 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6602bb1efc54448c6c845bfe9cf6e804cea0fb454cfab37166392db78c331e0 -size 675037 +oid sha256:a13f3ef142a814685064e683bc6d5589cd1a51b264856bbfe0eeb9ab5f3d79c9 +size 682733 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index 6214291c6c5..be5fbb13eb5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3759a92ecde4d090fece27cf7b82b16a309467c16591a079895e893eb50fa787 -size 741543 +oid sha256:24e6066eb5f8c85f6a29b41b01c9949e478fe7a90e68bef8861f5ad4a7d88540 +size 755801 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 0fe53dd6dac..f1ec6dad84b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a26c7170fae695bc2fae4c96faa17f94aaab73e50b4dbee22de3df30447d105e -size 651403 +oid sha256:5c4381dcbe53706c30c86dba6b7217aa357d47b91d6f240b85e6e5263c9dab0a +size 658063 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3074a1cb4e1..24649e8392d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ceb679e240df9b036a5c075a1ab0666454300fc608f224d5ce970093cd1e59b -size 791147 +oid sha256:7c081af04cecd37ca60c47a61f3de8b8c41f5084c6888bc7469df841f7f69db5 +size 800521 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 63d58fe6826..deb8c64df55 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:84b232234209c1063b86f340493f7a01b59fd3c9c959df6cdda9d6f458a6c7b7 -size 775803 +oid sha256:02c991c62bbd7f3a0451f3ba44fd257a1d7244cd3631ecd7a432879f39cfbcf6 +size 785077 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 85475480d7f..dfb9b870e03 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15b69398a5531809ccfca9ea4017eb551606ef29af8a1b7c03652767f1afc8fc -size 689571 +oid sha256:ea618550c1fa5ccb585f27df29f61d0300de0851e3fae92a0d00cf2e6e9a07db +size 694109 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d57288182f3..a3bb76c12ec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f277629d7ce084cf378125472f9661e780b6fb7971bf2f31ed3f8deecd579b3 -size 736977 +oid sha256:10b1282c1b04b236132f4bff416a9beb0e85e8534b2ec864bab4916cdd799363 +size 746795 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 4ba96e4538f..2ca9773667f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:98bc55a7c10b4cb039a21be6b62ba488cc910a2097e6ffbd1b7558a25c8b37c2 -size 723409 +oid sha256:45e0bbbeaa914d0a41b12c8e77d4b43b4ce34d4b0d9101d3c277ad34300a5eee +size 732141 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 5ee9ed9a997..96881202486 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:207760d79288a28ddfd6a0ea7a6b8ff794b9d879d4132f28eee8b5f7fd0ef1a6 -size 584892 +oid sha256:92bf6297b55c2baf92c0602ca906a315cd48b6a2ed2e91e8fd3f364fde5d161b +size 591058 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 877543dd260..006d3c1ef41 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3327466fbd6955b704ec7d91cecc043b26ecaf0b903d2ef8f969f547098ffd5 -size 781275 +oid sha256:37ce3f3a9fdb476725805834058f83f5cd40995c9699648c2a5168a60a17339b +size 790401 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 96ca6c6d327..3a963f9c2c0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:644aa63b42f58eb85dec9cdea2338300e3ea9b7bfdeb745cd1f129a7c984e9c3 -size 765043 +oid sha256:7a4fc0f4dd1c409cab453c1a824961cbebff2b29357e53b0be3fb9d784bd477c +size 774169 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9c286afae8e..f968988c5bf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4fd090bd5c76e1f28375ec525324ea8328537f99d0d01d07ffde9a9e52afae1d -size 732827 +oid sha256:db158454d66a2a5703bc9d67e80b83dd060755e8144dc7de7fd7614c7efea53a +size 742349 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index f1494b899a8..256f7453035 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7586f6176b35ab3c681a402e60944c03c43e661f8560749e98bc2afb50bc982b -size 719111 +oid sha256:db56cac7809768a8bb8f9c22694d160f1868ee62c379addbaed8c933f150990c +size 728731 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 4e7b3fb0ccd..f2ab27ab07e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f296bfb951a4212f35e6a446987bee1102de6196c58ff3009077785ca35464f -size 785751 +oid sha256:a1e6f2b9d70f0922fff78ac4dd92f40e3c605790d23328ca70668b497c98e18e +size 799959 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c6f0f7b88e0..ba572e058ed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5988515502283f110d3e0616bcac3987f0c78c2490331e47c2acfff0a4a842c1 -size 696547 +oid sha256:3f4588700082754122079f13279b7ef033c851b5306edab2cd47e4868179ac04 +size 703455 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index 27a27980dff..e408f37165d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e4386348f455244eda6006da802d7fc1e51163eb51f3bbf9180089303d6be1b -size 774549 +oid sha256:63948cb8fbe7266d08e67a7251eee1d4b11655795e24f02ebbb9f2e09e5c59d0 +size 788313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 764b7928999..4a021fa014e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8014b93eff3b76da52b316c46594b8dfb29732017facf7095366e394e06c62b -size 680709 +oid sha256:92adb511183f38cf88c147a8c696a6368022933ae444a5efdfe9f153342ed525 +size 686581 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 21f6d7f2dc6..a87abb109d9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e30c4ef46e546654e0552e73f723902eb9d49624031bb2a66e7fc728a031a44 -size 625071 +oid sha256:a4ab3803d2801f4337a4fd3dd0009db77b1ea6567da0ec36b3595c1a86d80dc7 +size 638491 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 6286727127a..6996464f6c9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7b7bab58d75ac282b33a59f9e7fc80081be4ccf8c60efe60e89b59539d20bb6d -size 547708 +oid sha256:88c6451ce6b0bf3d5e2d345e2ebb9dd4ad038a45eb4aab7a49aec817515f081d +size 556046 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 425c9d015d1..76b9cc43cb0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d558e231ddfa204b3fad56ea55506509852f4bbfb883cf7201d5c506f3c7e4cf -size 741299 +oid sha256:40e2be96ca179d0259db4bcdbe05ea0e52b163f1483021cf02bba487c733a20b +size 755557 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index bd288489ef2..794a90d7b4e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:576072b7c5c246f0dae7f3a443c0d55d71bacf4f9ed8b3b507da9570eff05ed0 -size 654415 +oid sha256:d975f07b1417a3fa046eef43a88470347490f7a905df14af647422ad34427bd4 +size 662111 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index eb69f09251b..3bf4c3d41c3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f3e80e9db0c17337e2229549b1a5821512285db72239618fc05c7dd0618398e -size 731973 +oid sha256:c3970c09f77e48b3152694881d727ea2e44b11dcf346556b6dbbdc1f7e0b8c84 +size 745639 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 5fb3bd710fa..81bce6e9dd9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d858be32b4dbfd5d664c9e9f66445dbb230fb2ae2d0ba9a73fa1a2db3119d401 -size 641093 +oid sha256:fe73a0d104160b14738f4f69ebffa12f5bec9747c72b8fa2e063beb83b73e3d7 +size 646963 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 28695e466ec..6825080c0ec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3fa1aa3d447a8c4665f9aaf60b92849923f0f2167d95a3302734f2df48a8123c -size 709747 +oid sha256:c9dadc91d344f4fb0df03eccb61507a42d724ae1796b240c18932d15cbe2d9d3 +size 720207 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index af28931acb5..78bfc4d93e5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:845e140f06fee7ed3effc0c170f17efad1b5c86f67faf7f402f529b3ccfa05a0 -size 667911 +oid sha256:07055e2a2175c78facaeef8e721efe413291574ed8ccb2baf9182315b8784495 +size 675065 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b5bd379d464..799c9d3da8d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6fb633de1cc064f101895d4000859f64b3d7b510a3e8b6a025c47821143a3b19 -size 698149 +oid sha256:414e23ac2c862220f0b6ebfe7754a4cbb3470c2a71a22ae6be49a96eb2602ecf +size 707965 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1da607c1019..9ae11df6108 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3faec78cb2f23d1157e887a76d2f3fa413b6518034badef7ffd0d5ec3526fa86 -size 663959 +oid sha256:f868e73ccb7354ce2d90476bb80b2e84a4a97bb08a0028a05a19bf09553ef169 +size 673825 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 98ba651a107..de98a1018f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:651dc524d5d23b908e59dc3a4d43619e68ae4744825d3c7bb7ec29b5dad02328 -size 716683 +oid sha256:5f61792e426d0c98005d10b049365eabef6f71989b033029913cb0ce2890faeb +size 719101 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3b43d360821..8580ae335a4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f3a325c7bb3de699c05295c1cdcc537eff07ecc4e8cbb5680dd9fd641184139 -size 620279 +oid sha256:b55d4b129f68b6dabe788d953e31fb944d99fdafd5bdbab7d3384123c4380fa7 +size 625063 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index e2a3e695732..7ff0ea0c1d9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d675ae8f2cd34ac0b630a32b66a199c47b84080efa8fca8e0e9c06040278ece -size 682493 +oid sha256:acb911482121959446aec2cf0d559a280db3b6ac873458506a7bac2c5094c184 +size 685651 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e2c81c18756..806d4dbf282 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9e1e59eba397c93f9736dd6986646fb6d81fdd5507107e2ef4bf0a61b2b17335 -size 591366 +oid sha256:cc9b9af0841c7209b16d50ba19666cdd847d60be89613f78f631c03fa3a8572e +size 596102 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9210a51a797..87e704f5369 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0995d03bb9943972bbe602826ad565aa56972f9c1e175bcaf4013d37e22174a4 -size 866183 +oid sha256:55370db6218bdb8c6eb1edb23f4b57f05cbe8aac633390cba0691bdb53771b75 +size 876347 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index ee690ad2ac7..bd6c6c5617a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3811ec4796cb3f4cdea086d560c8ebab45915bcba62efda4bde7c1a7251bd3ad -size 831649 +oid sha256:c8fe0a6af65c2696a859b3b20ad0ab0c55eaefc2c50a1da969e3bde3e2e77efe +size 841861 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index a8fa9fb7619..77a9e5dc724 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:daa9a84e39243ee624a04c5c035a8095da9849c48cf4c95b51332cc8b39cb205 -size 794511 +oid sha256:0efca831c1c543d565379d7e65130a144abf879871ce5a20b99583d022293dbf +size 758547 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 1953ec4d905..98c71ec0bfc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f75933274b440542ebe536adb0b7c482921130e3bc8860718e8780aa556ea2d -size 768751 +oid sha256:92b00f2a49e808f5e8ea5d4d8d36ce4a82af2f34a072d6179517cc087e43c68f +size 772697 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 192d28c517c..62ec0619b6f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5beaf668eb22a8dc5451a365d09536eb0715ee0b316cf9baae8246887ba6fb9f -size 818329 +oid sha256:86af3767ae621bbcdfaf2bbc5626e5f408a7e1ccd6846898579b12351c4c57d5 +size 827257 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index e4c330b617e..1b6b13fe709 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5af6676020ae3729834a358f460ebc431d25a4137ab8a0ea1b7d6978842d7902 -size 785569 +oid sha256:5af9ce97740638e2b12dcf6b7c17ed4122812dbc973ef9cb2aacd2378a75ca7e +size 794547 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 1d4be641861..fc15718cd1e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1a9740548a0b95f01b8fa2ca66ad6f0e7f480b2851fdd3771b98148ab9c6147 -size 644989 +oid sha256:33768b7abd74d31c1d5a8101604820b11153380ac3d4930ef4d5a5d568069bc0 +size 622049 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 18d08917486..5c03f653652 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1aa50cd9e1417944f73e1c90ca22a6c5833ef45b3c2feefbe2dafb4923827c5b -size 649273 +oid sha256:885fbce454a38b8c391949e79c09175acf64dcb30e04a205d20cbc9a2a471f07 +size 655439 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1a4333a6a39..109aef00757 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5a305155d40efcc8480db7c60975a477453671753f852c26a4420b1297641b6f -size 851427 +oid sha256:058aea4ddb1ddd0c04127a79db6b81711ecf1a5f833ad99d75b086d56b01ef14 +size 861343 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index d199e5656e2..26c2d41bc91 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fa0a8fc08e9326870c6b96850cfda97c9bf562e2bced70919c1020bd5a1b1a21 -size 816103 +oid sha256:2fb6438b91288ff4bc6fab5d3bacfb21779a7eb3830323dabf2c52fa6ee50b86 +size 826067 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 723d9c54695..bf5b4b4fb57 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:17af57bc2fd4a7cec9ed74d9bd0a5a69c79014decb9a9cbb3d771f2e7743e4b8 -size 809541 +oid sha256:6dfab8e488084111a62a36a9f2ee3470221e53cda6d97d48165d0cdfce88a822 +size 819063 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 3320ec56e3b..36c8a18743e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46b878816a4d76ba0bcb5a95431d138403d07ebfcb4590b390911ded26360189 -size 775745 +oid sha256:0978bf1daad5e7134dcb1e838066c91eb324eb44b36d017178f0f99d43c7ba4f +size 785365 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index a46c30bee73..f82eb74fa5c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f44c1e228527eff2e53f019586a3d7868148c293248b12e9fd96c4a23ff3adeb -size 858715 +oid sha256:5973bfeae5b41ae913a82d3fe6058e7208a697ae3e83a0d7713a6868260a9d0f +size 873811 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6ecbac30ce1..33d9b528e44 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bab948bd93c114fce74324f04d540ff132d807793f2ad263d1e77a21742e751c -size 767439 +oid sha256:3923d43cc5f8edb9e99d288b530fe284ee26cf06d3767ba2082c7cf5db0c4a2d +size 774347 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index 8dd95d4d3ba..536b80406dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:572ad037311fb92cd8128d487ca7c6a44efa7e01bb37c92b65213df3a0490358 -size 828521 +oid sha256:fc1d147092a0833db4b35a303f346528025f98a3dd9be1499cf3c5362c85b755 +size 843863 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 49bec68b3bc..720c37c95c0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:14ba0f1f2ca4a3464187093a9688b5bfb36715f68605ae635387e791ffe11b77 -size 733397 +oid sha256:939e20f0122de9740b77bf8ad3c00f292022678cbefff93083f1ccc504416f90 +size 739269 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp index 8c493a0ae89..569f5393cd5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b554369d1b6bdf72262d82f6a69a545a01e3202c5256183f5ac135328480335 -size 693949 +oid sha256:4343e02e37379391ff34651e57440bae2484c09d5f1d5d6146c4074725335c1a +size 674907 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index ba173833a9e..58ebbfd1ad0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75030fae43a33c06f1895ba2a2433bad626c05bb2b3453ead56cbadfe7ce7675 -size 706669 +oid sha256:e88182190dd4e29dfe07d1e9990e9f678817c321822d63e83eaba8b7fed156f9 +size 715303 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index a8f709f7e26..58cbef0a10c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2eadad563d2de2ee6a5ead1d73c277e72f4cd6011ad122fee853c8bb76c1b242 -size 604598 +oid sha256:6f0fddf1b9e2f8088e908cb8d7da2757f272d42dd159538b9fc0d7f93dcb4460 +size 584174 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 1f41742d06c..d3a76fd1ee9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1fb05f206e227dd2e9e5816a972027e2fc3918ea0f373eeb5f81536515adc870 -size 611940 +oid sha256:c8e7b26cfbe2acad88699267f0744177e024ad815ca629ff1fcce83c39e75730 +size 620229 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 0635355f38e..340e004684d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dba96dd183f0236a08f9f81a06ce997cb3438b2647242b741a92b3001651d935 -size 807701 +oid sha256:265039d08ed80d292a077fef8f64c1675f3d4dce5260c252d0bee4e93bdddf98 +size 822797 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3a2cbbdefa2..e5e022f7244 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f77caa65bbf8a791bc71665fa4a369fd0cfd843b829262251c51cf9f9ccfa2e -size 722593 +oid sha256:d44e0f904382d954cdab400d532fc6ddf55eacf900cc8680276d2df0bc067dd5 +size 730241 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index cb6151fe322..690a0a4ca94 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:60150769ff9972e345942dcd99007f272a2ac1d8a6f138a4ed1064fed8a9b393 -size 779383 +oid sha256:65e7eb20a3dd903b2c6b3fa1de44957686d5535ddb6932ce675df78573bfbc3d +size 794479 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 2f4eb0c1e62..5fcc051a8f0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:49eff43102e357f92a2a25134e3b275a48c8c223dac9508297ea049fcc097032 -size 690279 +oid sha256:e38f37d264fe1f72e2256b364cf0656061562f3d2ec8d7f005248347c076e15a +size 696939 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2d35a0eb054..8de049d1417 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70f10c8f52c5ff09c8e4ace3ed345b8e61d80c09ec1a2ae6203cdc5dcebacbeb -size 842355 +oid sha256:f3da304e7cf74d6c1973acc769b14365f96faf50eb5a5b01726255c34acb6ae9 +size 852519 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 63d17662224..4278202ae99 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b46944ddce9c13934e8d490730155d8c5484d539592d770a9435a1d513b7f353 -size 819365 +oid sha256:6ab0065a56820aaebb25a0cf39735e9e6c20799f6e795112d01e7706ddc576c4 +size 829577 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 2653ba6ec5c..871ea613863 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:933cef477bc2b0580eef9d9d862635d5ebf80f0d169a2d15d02dc7bc89ec82cb -size 776751 +oid sha256:25d3a988c9c0fd39ed69cd1f506efe3a649549939071a586ae8db86e9aea6a21 +size 741033 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 24900144b7c..53a4d4886c7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:547ebe587cf367a3737d2d3d15ca4de9531db6e0b518663e177a06e29cca062e -size 744923 +oid sha256:5cfed792753352bee0912ce964654d72d400b5160181f0d24d0a3c3b6046c6b2 +size 748919 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3502a78147a..48d25ad50cd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3823e239446c972a30a97cab54336e9f23261214c674635c84d09094bb24d3dd -size 794501 +oid sha256:7370cac806b5a065ebc47f319d032e1405eb20a5f66e47172a85bccda139fc61 +size 803429 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 232c36ec12d..0b13afcd54f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0d64482e8918c4dc26d25b091341a980075209f34255f646a4cf36263b725350 -size 772495 +oid sha256:e86466f2acc0ef029299d6d1f5f25b1bc1e80ac15e96002bb8fdbf963eab3301 +size 782263 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index 0fd3ef5401c..b5e42fa2ff3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cdff9b1a7676506f97c22b60132ed9a61ff69e7ba1a10667fa05eee1d5fb0761 -size 627229 +oid sha256:4021c7a4ecdae7e50acc9faf3337f46884a2eb47453ec078c85c714ba273fb0d +size 603498 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index acfd4f8d4a7..1fc5e0a2250 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9e78f29813a367fe248cf7ea80a6f97edbfd2249c4442b0eaab73f86123de4ee -size 626235 +oid sha256:5d6de40c3487fc1f8274e08082c5e27caad4fee6bf2bfce8a22820d80a408790 +size 632401 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8e47c9f249d..b91a737d439 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0494e182c913c2704db2b84d1bc8c3f0c681e5acbbdda6c02f2cd06883f3c0f -size 826809 +oid sha256:a91c6492949fbf45e7aaf91be2feead18642c145ca4c9383986cc4807b170dfd +size 836725 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 3cc79618157..bab032bd007 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f89f8f6d0ed009cd7e1ce2a425d4a649b393c7c508e8ce36018129061cc58324 -size 803819 +oid sha256:f98ced6ed3acdceaa5d56f703ff28bb5fa17257e99d67514e02adb3e1b2960eb +size 813783 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5cca3a5cd8e..03cc307f4ee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a23419d536dc3365c8b018d01682d9e2f193c0a0bb1b838e77ab2c9691e7d088 -size 784677 +oid sha256:0851ab4d4840f2c5c84a2521b3e08e8449151488c02d173cdcba493a635c6107 +size 795235 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index b12936a4a49..fa5b4b612a9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4bb3251ffc5b64384339c48b30b49dc21cf3d5a8539fbcf0017e5bc7ca99c819 -size 763461 +oid sha256:48a36b0cb44731a1bb86e8b4d3f1a1da5aa6989cb3eb5d5098cb989410a70c13 +size 773081 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index ce2c438ce0a..bbfb7043afc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01388004d358967dafd494723319c8ae5996d23ac3e9bb769f79d37823ce2d4a -size 835675 +oid sha256:187d6872aee7ac74f74a1c32d4ecd8390464ef49c5b7ac39b969d50a71a8f58c +size 850821 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f164b3145c5..7145baca87f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e55e3f3bae2c21e02ae8de2c438b70288d7020b8b9a9d5cb9584edfcc8350b36 -size 743611 +oid sha256:8b5644028ce98dad306fd141b5634c511b208dd439fb2d815b3e3a21e34fd313 +size 750519 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index 85087f0422a..0109badabc4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3c082be03bff14798971ee17fe968a66a08bd53487e87bf1e4f37c7b6feac07 -size 817865 +oid sha256:a63e250b0c204779633a075ebfb76782390ae6822c650d5e7d9a87073c7c48e7 +size 832517 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index ff3fdc2f308..f23e7ebb9de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:823f1467b376abb73066e082a7f42e78ff844b68376d3c69ac821c24d3dd5e85 -size 721113 +oid sha256:06545d94b9cb469b462d6963d4331b25d757064779ddcf9008f9b2cf6dfc2de1 +size 726985 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp index ce1a53c71f5..d48dd84d285 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd2158c27729a5c10e71482037368b216a9040af7b7343db94d75644bde6fecb -size 675695 +oid sha256:edcbfb9f8aa7f975969d46effa29cdeb1aaf87ed5067f3eb3dc3b0e2acf6fb29 +size 655815 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 3e31830b99b..c963ccd83ce 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f9ffe5d8dd27f44be420a614c1e1b9d71c089c30c3bac234653a28f4410b572e -size 683581 +oid sha256:263b34bf83d6bb5d878a03ba407ea5617d4e52d776c694d882b5657b7d0f0dae +size 692363 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index a8672758c74..3ef7988efed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:362902e53a6aad68762005415db01fdc69963dd11721d40fe83ddf053671d355 -size 586886 +oid sha256:5df7314469c7689b60c3f3500e34bbfaa1b50bcba95feff2755ecf830f0d7a34 +size 565624 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 35dd3c6da9e..7057f8c2ce6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:619db473f59788fd95a94c83a87d30fd3de55b5c85ef670c497306a6f4cbb6a9 -size 588902 +oid sha256:182b819a1d8be1be88c0fd59d9d2c50e3e6648d27852355420a3e29b3e81f6fc +size 596400 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 60f99b2a4c6..70c24b74921 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:101c7cd6baaee69bb09130f6029f53535e60ded87bd02d43084f4ccce92856ad -size 784663 +oid sha256:68a0fcea0c7704568e194a055f533065f5d363abde4fd581bcc7f8318b86ad67 +size 798969 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index fdda82ff477..6d20dcafc49 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:882a060e442127c8bd1400456b61e0d7dff5892096215983c45e2ac300b097d7 -size 698765 +oid sha256:9aac841f072e2e8e1ac6b304b1a403ef5003716e11cf8c592edb00239dddd137 +size 706413 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index 0cdf9a541ad..f9a69d7a63d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:378e2ec4cdf7fae3c8df61d80180fb9f7b38b89d0fd66d81ea7e1f6e5414034f -size 768677 +oid sha256:0c7406ee017beac6416a31cd382e83b31a9442f41096257b0a92fde39228e9d7 +size 782243 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 96e4ae8ecf1..19544bd486c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a2f998c3a85c0c3ae8be95978fdf8a36fbfc4e2b0d2972d35b4101caaabb8b5d -size 678783 +oid sha256:09c6ede4e4f06455ac5446b5e8050f4af1dbc888b2d565ee9270d1cf209115f0 +size 684655 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4ce1ab765b1..61632fadaff 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c37c5a6f42d25a7263183b756fb763eb2e717054095ab19b2869c00a8e843fcd -size 751681 +oid sha256:a3ce98d4b0b1a477122f3f746b11c0b1ce4b7b066972c5b43c332fb7b457e816 +size 761597 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index c3a86cdfc1b..f6cabc8d78b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d77b91b026a1c2adec44b4465258ac28aeb613532143f738d26b58e53d37c479 -size 871421 +oid sha256:d165e1f254d4d77faed8390fa0ab2d21ee1032902d510b7044275b2cfa4da72c +size 822039 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 06c30fb66e2..b2c4f5fc6c8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:020843663f817c1fa0d3084baba6d2770514b7c6a4c11d52bd97ce5522eaae44 -size 717393 +oid sha256:d31b6ac376c2011c760b42dfca3440663f42454b3f465740d2845a702ed8b027 +size 726173 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index a1b9d75a3e7..6b4f95dfc7e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ffa10f6af2c4af94f2892b88209c5eb6ec576d12d0388a32879f20a38884101 -size 722689 +oid sha256:4a8b032e6540d6d2df9fbcd5c2144eb42ff088981652cfce78e7603270de4dcf +size 684703 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 08c7ef199e0..8ffcad7f6e9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1369e3b286b773f5eff23b6156f543cd57658968e4530fb583c040b5dc0b58fb -size 735493 +oid sha256:7207b4d8a9d0be9aa6586ee7c005e2cf35825db4fb36335f18a6c713a0ea4daf +size 745607 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 22f91b3d8a7..b9b29e14d09 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f44774a40ea759bb25cf90962d66cf7bfc60d6f96ac0fd5069a327b2594383fa -size 707075 +oid sha256:d8fa02f582cc9f0732f8fe52bb04b7f86013e179bfb6ddbbb108b7e75cb4b7f4 +size 716943 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index df6f9f8ce0d..cebf5851d4d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9268f61efba69ced1ab503b97ba4452b62e4e244265d56e36b5f926dc2209346 -size 753881 +oid sha256:660eca739cac725e36e0132471bf07189a9cf77fb632c05ed33d68194d021b49 +size 757087 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c8abcfd6243..77f8879eb9e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fe548303adbbcb7d659f7c673deed92b134385fa2e057b81b5f2785582684eae -size 658067 +oid sha256:ad7d83df0232ab88e092e262286150bb6f597a5a0f3b0f5d2452f1510960e461 +size 665961 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp index bdf0fe9ed97..063d8912490 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b3192e4d4bc106ed310b503c5971b897f12f2bbfb0fb146dda6ffb7224f209e -size 800657 +oid sha256:0848085724387c7ce490770d02176ef90ef8c0aa16e4180999aa8244c05004f4 +size 759957 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp index bd8ac5d9201..aa26c67f229 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43eaf2760bd614d13484366ec91ffb28798f65cc296189b079ce5246e8dff23a -size 679289 +oid sha256:428b127930c7998f0ee5e6cbd8641a6416faa94f0816f64b0627d466ccbf59aa +size 644657 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index e04caa32e0b..65c92fc3341 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1e4eae4b99fa4eff4954821c7f43fc662ce6a893cf4b231b6ac2284a58a3051 -size 719247 +oid sha256:2b20c547af94190a2dc0953404c6b9a817039148f751c9c97f529f6e1b2e24c5 +size 722453 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 851e2a65425..768458f8d1f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:40c3968b22c7b0035b5d143bd8bc36cabd58f2576e95dfaa14343c474be485e5 -size 627233 +oid sha256:c57d8856d0f9bd7ede107c5690552a7e4cc3a9cc01554ccd7e1284b9ca54c687 +size 635077 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e0120f2e861..79a09e6fed9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ecd1e73894430f261721892f02bcf9e9148b57456e01fc988f19f57258f3aa5 -size 984633 +oid sha256:9ca11f3c92cc8759feb3b7014f1e3b7d3f2aa4f792eb6108a107e9ec29143a29 +size 994451 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index d82d6620dcc..0d2f98459b7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7969effa5a92af8422c0265e3c8c108d849abf8683d82ccecb1450e7c9402567 -size 932437 +oid sha256:119cdda9e46ba401d5b13f8b40bb6c9c6e7e8a72c822255b11c4b803f9608929 +size 943191 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 1da9e68ccb2..aa1159e04f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:57876f3dac36614b5224cf3de0de6c54550ed56e87cefdd2fe6deeb29aaf1c1f -size 893219 +oid sha256:ffe486eb2626e9063f4f29600e8e348d8da85a601ef89182b1aa1719cfe91651 +size 888483 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ddd2fcbf77c..531048d09c3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:04a15773a3abd9caab2a77619ce0e007ca000018e1b93a458a1151edd6119004 -size 916501 +oid sha256:81c152b7258c38b244599944cad7ba72bf9c1ba777349d9a68f630c3a8a1148a +size 926221 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 04a7f1dc560..410f6de8f44 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47969b318c9369b8fcc6f0ccf74e817d0fb1c9f509bf2a67f811fa888996d0e7 -size 867067 +oid sha256:a4882d7e04adb148b48bb76b1db0d19b6190d8fd3ccca05f523b16d14eb4fd3c +size 876095 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 032b89b8b8f..9a31b9c2a69 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07f33f23692c687142f1af6ecedcf36b863d2c91a4b04a313fb622850918a03a -size 748137 +oid sha256:0e8690735910874628898f2e00d38c74e4d1343340a577652d5b98b50c40c5af +size 743155 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d8db9b268be..f8605c2b412 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:62f20376a9832d2f4838df59b0204904158aa8a8e143f26579ad9e901be6f1e6 -size 959713 +oid sha256:2bdb064699b0eae3982ec5b82316ad5e55b99e043293ab225fd27bdb7d41c24c +size 971307 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index b5d09c7d13e..fa070881107 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4724195d5b743af432320bd2ff617468d437baafca36ca78edcd20fc898a2165 -size 908307 +oid sha256:db8968783f4cf0ad5a656f803de655526d0f8c8d4b40b4acabbfd4d0ffc004ba +size 918469 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 37ac744a39b..dcd8e302d05 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f4554a9d8c263daefcb769a6cb61ee7dc07bc7108225dd4ff42edda499f92f6 -size 902929 +oid sha256:8a2699d1d9dde2818e93ac41cbd80ca05a148e851e4cd06bc25315a6667896fa +size 912697 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 50a82a9a232..4a81f4f5c1e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65508d9b4517241c858a07c4986dbd36996de66a9783486dfff759bf099338be -size 852657 +oid sha256:0b1d5fae7b3417da3d34aca18ad69bd0a0dee772c8b55cdbcbaaa7f2abc594ec +size 862277 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index eba77fa8259..48f34982f29 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7052baa2e4281b26f0dc0e64e0d76c024e758261d6e0ccc69c5d4fa792ceec78 -size 973711 +oid sha256:ae898f9e639aff08b62d31625f39968b333986874c8468cd1510b68b49b94264 +size 980913 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 106bf5b85d0..861bc261a2c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b9d81337a520e170c4823cb3535288e243366479e74434f4e3388a0a7e7213c3 -size 892599 +oid sha256:6252f53dcc06d44aba2aad2f1177427a3626b750dd21cb38f257c41200dbcc68 +size 892303 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index e1d52cb1146..17c1b055ad7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0a7b0ba8c51dc26c0d6b4868117c6aa6cf8803df80849b03d950dbe635b1416 -size 928469 +oid sha256:482e692b5f4281730406e3c966902ccdc1346ff9c09e500f953498941bf2be53 +size 933501 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 189f669f881..caa350d8a19 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:62b295bedde97ef04e2d4317de876628606150598557f563e0600a4abaed8903 -size 842079 +oid sha256:1fb131ad5208b631d3ad45be5a8f491fc6ff9401269b1f5e0880f9eb2be2d7fd +size 840007 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 11e7c76f380..a3f15f8028d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5a27b10eaea82b33eee5f1e8eb3d08e5ecf828f64d6c8916c877ab1a00392744 -size 804399 +oid sha256:6029b687196aec76411a22e60cbce1446013e9a41ff563ec04ba14fff7b0bafe +size 798725 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 04e3c09d4f6..fdb5a6ee065 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e35806f0a2644d8da1d25aa80d4c6439bd158c85daa63a0e0d74f7a81c884037 -size 711939 +oid sha256:5e63fb355cb83500ab87d4d36d9f1c76d8dfe51c3b10821d09f9c1bca6e2aa6d +size 707993 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 906bce0f9e1..5f8bdb0edcd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:070bcbb4b851aab0566665adb8c0e5e5fdd44541f97aae8e710853886e3ba1a9 -size 899067 +oid sha256:54a342a4d6d39ff4ba4e0d885b9439e8ddeb405c6a12503a4a72f49ef0b24209 +size 914705 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8384ad7cb39..e074d3d2c76 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb24b213c7acf512ef48acff5f693b5ea4858320e5c3f974577756ef90826be4 -size 818201 +oid sha256:709409278f50c77b8dcf6d3ef9c8da98ab5de980159d9037f8f15073be8acc71 +size 825897 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index 68f3a188225..f592d42da21 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0641d925299ff0c9c80766a1f4943b17dcead0564aa31db9398585df0175fd84 -size 855899 +oid sha256:3fd4dc1764ba6e38d2a1c26e9d3b0aad77fdd3651904720723cdeed7f185b25b +size 868923 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 2c29fd2de92..231185db418 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5197dd8f2127b178a72d21798768f7bd6d89611f449331ddfbfb4f7c1b31f755 -size 769409 +oid sha256:50f045f2ef72c35463c41cb0346d1248954c01320681081fcd3914d0e9f2676e +size 775279 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index dcea76f5e62..f45b6c702af 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7de0dac56908d50c9fa3f12c263fa08c97705e65e58ed6da7d10f65bed315d0 -size 954343 +oid sha256:959b2d0199046d89c7c3ba4a9a5bd3a1e5e675429b1127f441c7591eb49dab58 +size 964259 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 26213e58e52..113836b9953 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d0e7507eff7b7dd05b3fbc1b6f1c5cc353db15c58ca9e103d296d4211a53dab -size 916205 +oid sha256:b130bbe0863be3b80328ff1abf3b5d8dc49196d35b50b1f0b37417aaa7fd306c +size 926911 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index df1601fcd5e..fc500aeee1f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6387cf3d6d0c856862cb84039af9be9134abd83d0bf7194764d9102156b59dca -size 863865 +oid sha256:39f2bed5104e9271aea857351b05a6813837f4de64a97689d38e067df9cdfca1 +size 858339 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f7031eea633..76a8b9ec61f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:453ae0e1cfc8a5dd156c8fda2a975f59a6f41eeeafd12742b57737d9de599fc6 -size 886261 +oid sha256:253c2be8f000d0e8574346cd01741cc0e7162e6e533443732bac25e32c6881a8 +size 896077 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index f427cb26616..f8dbdf80a70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5dca1be4cbf330f5d90ad2f22c2bcf3a1c9dd1f59a1b7d9e54260e2a436d1c2c -size 850837 +oid sha256:5c5386bd771f8fe61f5ca87c2aff60486899c202984780a5d5dadddeaaeab738 +size 859865 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index 9f12113a93f..427afc69045 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8e4c10ba7f694315a30330a6d4099ae4e8cea2d8179c02a4edac934ed0069e07 -size 718735 +oid sha256:87f582ae66bc79db6bb38b140593490ce16efe6edb3bb733c0c50ff17def52ee +size 713011 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 03e6bc79594..a5ca4b88b56 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1b8aceb53ad82a7e9ca6e1b3091d928b2445b1bb4489b9307a795ca6e9af4ba -size 929423 +oid sha256:8380e177741ef5a0e17db0b9de920e937ca1bed2c91168c245c4489c11dfff40 +size 940325 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index d097613927f..e66fee87ec3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0207dba8e3aca22e74a749f025cc76c95d4912cf0e43becc5ce364a4602dfbc3 -size 892075 +oid sha256:acba93da0c86c95326c8bc63e3e4251e9a7067cc51a9d668ab3caf8626e32622 +size 902189 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b6ed2046167..8966fa73724 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81e4d0011de250f7fb80a8a46d21a11581217a3830500c672b168213125bc401 -size 872639 +oid sha256:13bbb34f189c2bfc9790ce79d0217d3adbaed027ff80c66e7dd7c1ac2ec87b10 +size 882259 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 76608c2eabf..fdbc2689d9c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:572559dfd96f45bf354e791a7390c6d10e4d7d992e95fed3448e29ac2d83c534 -size 836425 +oid sha256:86ad6d7c4dec6ed569267f45af95ec05a20662aa3b741613a9c32ef455c6384c +size 846045 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index c8d29a5c4be..36875092aa5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ffe544c80a6abafe2562192f1eeac359ed7b9a49a813eafeced912fbba8c2307 -size 944357 +oid sha256:55c28199c08bbf6ecc0d9fe681b3af17dbc26660d699dd471a777a88c18fd539 +size 951511 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2aace1f9318..2f0adfc3913 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff143c76c7483dcc4bf1c275f2a061189491ed48370f457ed66ec2a0f34e9816 -size 862455 +oid sha256:2e119fd99197633e76f8bdb1cadaef321a7e17cba0ee2a96d6ce0a3f23519111 +size 862159 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp index 7aeb35c819d..e25f1c8540b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d71106072e8e3b1da7583c2b34285a74167e961fe8ee179b223307900a2f5c7d -size 915445 +oid sha256:0fc6ec77b5e5e2d9066f1b23d4759866acce2415c34fc2641164fd56c3403bed +size 919541 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp index 6f558e3761d..29c14e57d50 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8c8f3cd2ce63a6c691ae95c3bcfbe2228d0c244a5c2cd6224f276601edf4eeeb -size 826637 +oid sha256:657119b88227f64e9bfa9370e087f1cce1e87382f682d1287617e50eb49a9ceb +size 824517 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp index 6006a54c8ae..c89ad8cfb5f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a760021a3c25b93621ca0c633c4371c290c24001de615dcf340929208f9e4f4 -size 775045 +oid sha256:b40ad049f30704a593923a891035276fde9fcaab92a620f428c6ad9dbe565600 +size 769421 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp index be1752d1162..8030f49bc58 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a442caa960859dfaa88a4a6e35b7cb6a10d6fb302c29da1cbb54dd2d322975ff -size 681747 +oid sha256:2abea230dea132ffa657886c72b79b5a9af4e059b4df02e63bcd8d94afb8fae1 +size 677801 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 4b99313a045..b45821fa82e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f13a4becfe28eb78b8b0dc34589195e72931c60b3ca8764179c86d00c3c42fc1 -size 869713 +oid sha256:d6fb8c3a2c1b25196e83b0bced513f1c009e2ee18ebe5bb8c7981f16687d849f +size 884513 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5ed74b55506..da3b5e7a003 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38096efb68602c6131f0510aa51b835b1bbc5a1cbcb6d4486b8d23c1479e754a -size 788009 +oid sha256:d96d682b46ef4f6862e28a39b54b4cb15ceda4af1bc50fdd938d3f6469e0973c +size 795705 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp index feb62b6b218..120b18bf0d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2dcbe7aff6dbaa5bc11bd9c6b5425bce74ad9140adad1f271f7156accd8c5f96 -size 842825 +oid sha256:9f169bc221fbbc07cf3da0d3b02acb700e5e16b488301b4c6fb53c183a5e70f4 +size 854961 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp index 3fad452c007..ab356af5035 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ceed1acf30155e172fb08dc85f77bb986fd54c2aa0c6cc93030aa6889a190ce -size 753967 +oid sha256:6633e5d4e2fefb526e1066203e21aa9eac54a66da7f049e87c7097d19ebcb69d +size 759839 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ed06eed0337..e6809d20ac0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c82b9a745a1c7f57b8538fc009a548a74b52157e5abdaaed619335744bd5795 -size 825927 +oid sha256:881d1cc5825e0d022e8ffda2a480338a2e1dc96638e6f193d1c887235e0251a9 +size 840925 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 32d7e480adb..842ae1adc0c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d2c4633a142af366dcbbc08ab30f2c92643152d3603df1599a48af410ad6c3b -size 783845 +oid sha256:b501803f9841699c9df884aac325d5f36589f495ca7c3d10e209517312e23640 +size 798645 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3adaba7102e..f17c16e3ee2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8cf4f6fb572df39b30bc73a27b85e7679774e57849a6619a00b667b20b71f223 -size 801403 +oid sha256:ac5dd3beead5efc24e8bf2684501699a62a31f4339c934ca03fce15a92a9dd31 +size 816597 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9d0fc71afa6..3b49898ae62 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f8f92b1a918f1fa08b136a85c0ae097fc607cec0ec53a994653f22039289af2a -size 770617 +oid sha256:c144adc5d0e41e4ba22078b7a830d5e09c515c714e8083f5e0981cfc99c021f1 +size 784923 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 61120b3c2a2..94083a9b853 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a6450a3d27d47a15530ea8959d0ec165038b5fa2fcddfc5c6c2cba1fcd3ef1eb -size 820827 +oid sha256:5a6bf172fe586249d602412f9be6e76ade0085217039a8723d4461ab4ccfaa2f +size 831975 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 110cbb58725..6fb61822fb0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e72751529649fb68590afe0dc4aad0d97e4ef71e88f5e27720f44fc62a0d7a70 -size 725259 +oid sha256:1f5c388e42056ffaa3ded27dde97ffabc91a20f50c82a2d0471f7b779bf1c130 +size 737987 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index ba19118e6ce..e1fbe6b07f4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a4fdcbff0981102623a9446ca806ed2d77139476e0e651e5fda97f3b8443b1c -size 780617 +oid sha256:d67d76543be9670207463e2b47b017056aaefd8e2a911cac552705287b47ff45 +size 793839 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 02b36a6606f..b7ad9eec790 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8e1a98f6f5fbba79c4139ce15e6507a7b6306c6dc283e6b122ba36ade50b86ab -size 691465 +oid sha256:6dbd09f62d8103757ac8433ae329d1f9b1196d69b36fc870b9bb85927d995394 +size 703551 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index dc2bb047ce8..e0a07171aa4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef9663645fcb43e04efd7b122231db211d27e3705a353cb859ea393ccc31cd0e -size 644435 +oid sha256:66bba5b1b318f171dd51677fdc9b2435ff233c50998764bf267cb46e223fcdd5 +size 657311 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4a599526a94..3be14011547 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a2e7ba79d1c28b49366fcc6b0cd7053764aad68193557276b818e2ca16b022f -size 592188 +oid sha256:be60d0e47ecfd7e9e7d3bfcbe7ed3083cc814d48d4f6bb245c3e86a9313af64d +size 603092 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d87ff5d8a22..93d636b07f5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:081ebc4a3a2e0773afe69e8c0deb8703143cc9b9fa974e1982cbbf7877eaf462 -size 633773 +oid sha256:468681587d3de08ed897b87721c47abffed186d5d28230a3ae11615bec6b6c0d +size 647637 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 16b66e5fed4..eacd7c7ea3d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df1ba10c0fce5d1639a0ce76e88bf0fc3e82de1409daf9b202c3c099ac2f2c0f -size 588434 +oid sha256:b9820c31f98a65a5c804541df660240289eb0e1338b23826bafdbfe466ae95f4 +size 598546 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index eeed9569be3..aa51ce727a1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:216e0f7a7779fd332cdaf0496442095f0a406adf747a28a160af62d0a4703d6f -size 647079 +oid sha256:1b3bdce4712f04ec75540b2a5e0598c9f498aca1db9b4759647f1bdad7049fc3 +size 662127 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2f4e5454022..8f7de231ca7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d6af713c3b019c0f25e8e6fe0f50ddd7b53d4e606192e8c29d7a40a2f3911957 -size 557826 +oid sha256:b94aee962de0c3c14c009d31def46929bb4fef112bedaa376ad22787c9227892 +size 563598 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index ccaf839882f..351b6e0a500 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ab9913535f78c510539db51793e6f97d7622d53c8a339b574babecf224dd53f -size 583980 +oid sha256:d6ee370258d1ad589665efd97e85705ea948e833390800c8f5bb652ddd70f5d1 +size 598286 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 19a9eb2e2c5..4585a539739 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07d55fd49f71f8f67e197e9c5e121f3b1c10bd2a49d9298332cf4b2d557a025c -size 508442 +oid sha256:71c6e70b8b0ede514d6422537e16946e1c5772d6ac8710bd0ecd2fda5f9531c7 +size 514312 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index bcc095026e6..eae342ef30e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:480507d5e4d6d2f0f19a4e2047918b712d518933f4ca02db455744b64a8aca6b -size 636543 +oid sha256:7ac6f1b2e9d1c47c3d21f4dcaa470a9a2035019c1a7bb7030b53a9be303bf768 +size 650159 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4a48fed1427..59daa45c88f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e46f3c3c68b367a20705f4d7bef85ca75e0be87094fad1ed21e64a770c80ba8 -size 585036 +oid sha256:01bdf13acc88655ed6860be5c24ac95efc0bb452559d50d6a8240efe8b31ba8d +size 595938 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ded7a5cb2e6..77165800f3f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b2d71635402db2828c6dd2879a31630bd7747bab82da8dbe7aa057de4469445 -size 626621 +oid sha256:b340f3f0f0ea7a3fc175d80972ce6d75c1af0e8802583fcb1dbe793f7cb824bb +size 640483 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 150dca58cdb..9d5fb1cf22a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:afdfded700c550d90f8a3cb98c3c178f41d2c85b12154b9f6a7f7d7ab1b6352f -size 581280 +oid sha256:80aba882038176a6e9c3ec1ae0b4006b291f2ecdbc75476f8c29ec954fc121e6 +size 591394 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 0d20e552979..58291a4be1a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e6d5dd97e2b66e0fcc263f542d3a5a3aeee8c28c2c5fa5fde67ea8e266e6d01 -size 640715 +oid sha256:1abd3fe351d9acf0e62d22c0e12a4309b1f2429a9f5c5ee86739b0a8d0eec386 +size 654973 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 601f5cc448e..8e00087e7c1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:122591b5f1bbd469c6cdb65dd315dd9430193c84f48f88e5a1d78efef829ed1a -size 549884 +oid sha256:b87c1a3fab3a27b383e5f0362d8063292fe9e16597fe93f84fd0e17aabde3d8a +size 556446 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 83923ccecaf..0d6d7da2e6a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d355e113879a381eb1a7a57552031c2b4af355a28b7835784ee159dc54f6ce0b -size 576826 +oid sha256:1fbe200646629be8f4a74f86e363ff9517e63d1e29dfeb543726495371390cb6 +size 590244 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index fe1f21f7396..335482d10f4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb9391134290286b5b4441f3a5cca7c2395657375b8bfd5e575654e59e21b3f1 -size 501288 +oid sha256:a5add4fdb9317e0f939cd22098e2b20805f07396ccecd072e818867f629af665 +size 506370 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 75277171e06..c192602569c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:065985af1cb6c162ae53149779a017e931935684c1030a25e61461dd15f5152b -size 664993 +oid sha256:fb586b71934da7d9407c36bd224aaa9d44e3c9f901341bf92415df37049daf6a +size 677473 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 80d1fa445c4..5292dcf78e6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c6f28b1a12c1334c9002d26812fc64f16259812e3f14ad8cf6b6baebc818798 -size 613534 +oid sha256:9a4fab1c75bb373e53b754b95c0ecd391fa32c8e5e653b30ef83041a1c7d9e52 +size 622613 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4da3125cd71..1fa6196a56c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b16975f1c12ecbe643a3d400fcf9cb3e3566847b9025022bf6dabcb254417e61 -size 653541 +oid sha256:a348da13baf26fe37f9b95d2c0bef88b27fde8072763dd74a0407d0f92903bbb +size 666219 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 47639d17173..de2111ef8b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1d87482f2e7d373f4cbc612a60cf9decfc9f200650e60df626b7764ded93ba3 -size 606820 +oid sha256:8f0640bc8e706b7764d2f47dcf92a95fccd97c93231beebf7c8c02d31850d8b5 +size 616588 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 0068a56dd57..c33750e17c8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb18c5dae2ad38d25ddf83812568084830eb37c566593226190f7a8e3a6a3a50 -size 665811 +oid sha256:cc7c951004933010e2901fda9ba20c3fc0ace98eb306c936b3701c77f9347fab +size 680217 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 69001a69e04..ad30c160ab8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d6556402c60353af967e33eee6a450ba8b982145dddea0b6bc9d059144515ccc -size 569256 +oid sha256:8fd6431cf05f01ecdc80f87a739a645a0d14a4e0b4bb0caf0fa2e6b09d3c58d1 +size 576016 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 5af8f1b2d35..3660950c284 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ac98afd30942fca297510cb42485ee03a990c97894ff40484e12041498fd4ff2 -size 609470 +oid sha256:ce8662250b207ae409ee4209695d2d00e776e46fd800a3c5c32a04b2d8478375 +size 623087 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b1e98c9a493..cf6bfadf0da 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:601f45672c7506340134a44b061e6539293a6631886a7d818b67e58b62e534c9 -size 520710 +oid sha256:becf993ff00beec16106980478549da259c52864a082fcd9b7c5846bd2e5a82e +size 525990 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 24c42564f21..d28d9f84504 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:748b20535c5d462f0197c9183eb02794641d105d0d776d046880b4028a6fe2d3 -size 657889 +oid sha256:3db845f6d7f21cb7a85dd328ca7cc76cbefcbef889a561f9cd11708e13ab2699 +size 670369 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6a513c68cc8..9277540e5b8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15bb56df3e94c42af68f1e3582aa033579a182aca7f9612eeb89c7f89e0dccb3 -size 606382 +oid sha256:0cbc9e263fb81737bd9fefaaf43d75cdff0b6fb12f4e0723b578f989ef3992f6 +size 615458 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2e9b20bdaf7..019c8cdc3f0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6cbf8ec61d66ccb726e6a86b0025dfd55c9a208ff90dab7a5772c6226135e06 -size 646387 +oid sha256:37a442c04f6673c36ae946d25d00beba4e1f381d0eec6bbf9199f371b64373f2 +size 659067 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d76de9e269a..f0cffb7b66f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbe9a7e8bceebee6b9db2e29b740502c84a52e696504ca2f29a7127d68709272 -size 599666 +oid sha256:f2dead896a43ebf5185beec420a31e105f7f01887b8f108a729d955466e6294f +size 609434 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 575ebe51a2a..a7939d3c93a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b7be1549b17dd9f9ca948c60030ac45df09287f30fd1eb6c6ded38dbb86ca457 -size 658657 +oid sha256:09c19a8b473727b4fad01d9d2742b8c6fbb4a954019eb8ff177dddd5cccd4aed +size 673063 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9a04300943c..6901128e460 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dad726d29c133298647778dea1cfd4e5012d0084c861ee4091bce8e7070ef387 -size 562104 +oid sha256:f68f29670b85b541afbf486afab27ae93411c6efc588a57276c587b40c10cdb4 +size 568912 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 793dc8b8e72..e0ee44f54b1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b645f17843bcf664adae89abbaff013a445560aab718796b4d0e4f29d06fd42a -size 602316 +oid sha256:0f7f4bd9ab83e2242508b31fb76c6971b5dee28a424a2b16bae7fe088a95daa8 +size 615932 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f86f986e319..5cb785aa24f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:00c18ecfe795e4d871dacdd3f032c81f57507a71079aa15f446a3947bb4ee821 -size 513558 +oid sha256:a64086dfc6033132da4016a39b7e7b04be33ca37e9eb2a97121f9eeb43e77d95 +size 519626 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 38c3ca8962d..ffa1aa2bafc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:026a4fbc62cd81220649ff50dbec33531e397aab6f41a1cdee25938c0091634d -size 743991 +oid sha256:4dbaf12309e608a92a2ef0d0e7e9b169508c675fd7983559df17ac265c725fff +size 756521 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 645a3cf130a..41322008f26 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:605a452a151765ca20eb5860365cddc96887a8c1e60b5985338a97b21482e811 -size 695889 +oid sha256:bf74bc864a570c01c5d30cd71ab4a461f15ad1b5673d040018eef78ffbf74883 +size 706495 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9fe7c5480f9..f147c98b113 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09363a4a690ab72e1f518b101f2f5d7640213f96eb8d01f5da8638ba5f914815 -size 728445 +oid sha256:c8f08b317a1d64e9fb5ae59de23b7ea577aba1d0d8a706d5cb14b2f55b50d05a +size 740333 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a8ccb2507ca..06f99e640f5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:908e526f9c19a449f7d911a2af5a0b0c7cbfd220b7841bb73c3e4fee11009a48 -size 686805 +oid sha256:99b50959fcdb726b28d78f34440a9a7eb8ebc8b5c695dad82bb62b736cceecf7 +size 696819 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b6fb89e0605..97286912223 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab00a3a4dc87c09e8703968e2045f6be5a4b782f4254c546ff6ce22b08de2c9c -size 757093 +oid sha256:7203bbf53587318cbe5e515434e434938f9a73afc013903b6e4aa34bde4e244e +size 772139 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1530245a163..5a4664f7f2d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1da708f9fd1034a45133ce63bba22d3a8f57b35ed56ef27dc935388c6df92a95 -size 667397 +oid sha256:5836f7ffd41f5e4ccbaa82e8ca9d45a97a761348303bc6589a509ee84b2ac816 +size 673465 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 8097dd172fa..385659f5b7e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:11d8aa1c25452922c030010e7f75d8f95c9c3f93594047cec24f58bfa9179945 -size 684423 +oid sha256:a256d02ebca8c8fa614cc3ae9321e9ea6662f34aebfbf37652ea4a75f490b4f9 +size 698977 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1dde4db3bb2..11cc597a555 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47c207b9d8dfdb51af00f98f8b14e88f32f6cc46856e9dc178d95a7ca96716ef -size 609180 +oid sha256:3147f51b70533e228ee9e7ed43f6e7700b5581578bc4f60795c5c6852b99d3dc +size 615200 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 815dee944f5..2171ca7edca 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:964ac87d2c01e2300dacd8af43fa7be7f5aa73b2a80066b632cef1749f5b90b1 -size 729733 +oid sha256:ded2ac04926f709177b60251c9152745c7e064e612c0dc40aea849e2d6df02d1 +size 743053 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2382b74b074..1e09ed74f2e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5dc4fc45eaa77347e10cd20c20da04f1ee598b6b3f943ab86deede5d882d2b68 -size 682371 +oid sha256:fbe9376408325a2e1ac0c537234b2377fc55f6e508adac92e77042abc0aa71e4 +size 692189 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 645fc93e7ad..beb2813a187 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:782f80f3cf3fb80a42623453fca41193eb864e3a9c9332bbf2f8dc95c5b81e8d -size 714977 +oid sha256:d8945f593addae870f9d253c4b04be17efda54726ea335eb364e8ba369c5bef9 +size 726865 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f595a0e19d0..2caad3adae9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:18f134bb255732bd9e5e781476f5dc88bfebe22236a89d34847f4e3c1953ca52 -size 673287 +oid sha256:e294b13561ddcdab97bdba3a75ff48264a44599af5b56be5b0c7a2ab55d1a322 +size 683303 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 4ae0d842406..393b5f4d5ea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1ac1de656cd7b11e52ec70683471125b99df705478e75f7303ce56cb6c629c7 -size 743575 +oid sha256:041d3bf2ff032052e54c89d4b679aaa8753994be6adb2793b71cc338c224acde +size 758671 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f0e717f50b9..e419835b48c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:392b2a792d24951c4b017bb7b6ff270b4823463bcd6f3a92d53d196940389870 -size 653091 +oid sha256:0e6f0ed25299969d60ec77475e461a9be5aef7f63c1dcf50a40361e1510dcbef +size 659997 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 6074423a373..56e8111abf8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:34aae657dc0faba5b0c25b958931f2721e804764a59b9a2daf98c42aab4b3268 -size 670807 +oid sha256:93225a851dbfcf7060321a3ca0eeab5ecdf064bc36dfea00610a8a2faacb1ce7 +size 684817 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index cef056d01c4..f40de396e25 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8df8ba24abd29834ae2909ff5129db14bbd2d57a5e44b87db021fc9eefd8de3e -size 594924 +oid sha256:46ee68b23d5785981ec359eb3cf058903d755481e5348f846ae25518f84b8281 +size 601682 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index afa5f90a885..af052aa8f7c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a79c775bbbe4b28e6c8ef77606e5033b3e36e1c39b4b5023669419af4788e2d8 -size 764449 +oid sha256:ac4fbddf30d30036741c0f0f310ae088878865e896aa1c9717c77ddc941bcdb9 +size 776881 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 1afb6d841d4..b9ba2b77c5c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:36eba60fcd85d3acda6b43275e8e146c215997b7dda967f32bb17f89003fbc72 -size 715507 +oid sha256:94dc703791cebcd3a2d3fdfc3b7c09d8e572ded1ff6d0119aa94a22581ae9e42 +size 725423 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 143816af0e1..3cc5ab0e47c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec7e26875926dc019ff2c1ad821a509e21b4b30f0e46ab63d040d15e7f923c65 -size 748113 +oid sha256:44f0f8885bdf0599ec8ab7c9b5b0b32335980c39d86d6a6e430b357769c8702a +size 760693 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 64736a7c04b..d4ff7139005 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b10f754bca37cea511a501b17c9e2da5f5452626ef7482e912c856029628323 -size 705635 +oid sha256:5eb511a5ca57ac7b0f29d293bfba44cfba48d1c569b33b30a09c9a008f494311 +size 715749 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index e1c0dd7b537..46653f00441 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ebd24360b046b3c99845b893a58ef348fc977821d2c2ebc53e32187aeb16b21 -size 775035 +oid sha256:f639c6fea806d6f48ea1443f8b64b223b4fb1adb37f1b31828b4de526ebd1e50 +size 790229 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 04a8cafa0c7..6c43ace7426 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4aacab1422bd0cd85cb3ac519a2ff20139bca5008b2ee0ed854e75cc6cb9e6e0 -size 680455 +oid sha256:c7fe6becddb4a507cdd9408131902ce4db4c0840d25f239e98a139d7847dbf1b +size 681737 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 41f1a27ca70..6e63cf7561c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e8fa933fa7d486f1932330e253e804c72f29e403fef985a6784b1affd8246ef1 -size 710899 +oid sha256:daf96d1082bf83824aec2ff1d3decba78f3e799c41a17bfddd5538b71448ebf4 +size 725107 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7e67bf75ec4..94f4b4b749b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:281bd86e47ca1c6308caa1f4aa011f41822c4a56951a6f2565f558f38c100742 -size 617602 +oid sha256:0c9d2ecbc3193460ec9147de6ebe991a785d9ef92c40279efab0b45cb0b28700 +size 623375 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 182a1d26255..60a87638f4d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:66b4a735096959bc5a199f443ffc4bea7738d3fa667d8f66fe1abf8310ce95f2 -size 750141 +oid sha256:f29589e9ef997d80f0a91abd382cb625f509d98ffcdebdad11fe7b774c1d3df6 +size 763363 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 599a1c49a8c..c19de320e2d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93fdc82dc585eb2a9e1cd2a24214afd8a7ff77d447ec81fc6389d22803de7d8a -size 701251 +oid sha256:ad227dc8b804ed92a0d6dbaa9a8dacb3b956b3dede2f0f574c8a0ac67dee4ccd +size 711907 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f4f82a54a2f..9277f515000 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:943c404253da8a6f132ba2f8479d0b3d51230ad49db300026a98a113aab707ce -size 734595 +oid sha256:96848aa2274680150ca40b1be64cb648c61c51afcad2f1b22623ef2231cc241c +size 746387 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 18a1455ce66..8cfed822cd1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a4041cc04688b07d826ea02c50baa129c749dbe5d0224f85224d75cd21fb0c7 -size 692167 +oid sha256:5a3baa7f521c235fb991efd27867c766005df8520cdef4b92b45761f33813062 +size 701441 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 731b67f8ba3..98e2b293bd0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76d00728043ca482d3f4cc3eb23312ce6cb4c7bf2454acfd4731a9707c346f01 -size 761517 +oid sha256:6a4afa78255aaf85f7a835280b0e5d9ced259b27c7cf8a5249e9ca22a305274e +size 776713 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index de23443c45b..f62a00e71ea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7cb79f2e8cdc4a9dc5f8f37c83e875f6bd506b3b4b282e5fe40413416125da52 -size 666987 +oid sha256:2fc9c19e7c798a3180bc537e5b453d433d95df8ac08ca065571d28cdfc706c3e +size 668269 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index eadbdb9b708..d39bf8d00ee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6efebab41bd48682bceb8bded2171bb7a3ef44acf167c82181e0952bd364dd22 -size 697383 +oid sha256:5f8e455b10a4e1ae218be0cc2b8e23ec4c120ff03566509a381fe9dc2b930621 +size 711591 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9050db73ffa..da3d70c60ae 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8f9c14af23de190d6e1be5cd15c6368d16c6d56490105f01cd5114cc44658c1 -size 604084 +oid sha256:f95fddb265072c09027697ed4245fa85f37109ec18b40263bb5f0f56f4fbb75f +size 609856 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index cddad98ffa2..923c3786177 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:940bdb4d2a1ffdc508a7f9fb165aff19f27647eb3e27a570ba53a977d958292e -size 623121 +oid sha256:ae7759a528b0a28b0f70f60fd8ceaf6478a55c7927142574a67c97747fa6b4b7 +size 635553 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e4017f9f69f..3d1a311e608 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:672cd7286ecba15921b49adb5ec038e5b3e4b6855d1afea3fbacff57a8cbbbcc -size 592976 +oid sha256:6f0cf0f57afbb77f37a5b681cdb3e0b868b0d260382c240fc8a7f33557566e38 +size 602990 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b467dd44753..2fdb0132015 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73948982eeb2602734101601b5843e669924e76c66fc4f2e643583b56dd7fcc5 -size 617638 +oid sha256:dafd64e2d960da014ab928cb33c69e40406cad24ac414504eeb012f6df216e31 +size 629381 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 90196b995a7..1929822f33e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a421c490c89c102d7294697b4cd8bb0246f0307d0b07aa429963761d4ab85bf8 -size 587840 +oid sha256:3fe3c04867b16a657fcc95cd70d2a51045d494a132c9a47347a58dee328e9af8 +size 597656 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index d08238f1083..3bd7f688454 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f7c4e7cccb0d0fd8581c45cc5458926844506773e5742499a34eaed2a5e7554 -size 619649 +oid sha256:eb5adfd40e8371e6c121c9db91b83f68869bc0e4508bda3ac5cf697a8e55ebfc +size 642589 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index bd154b4e1b7..ece46020ef0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7cfbcf0cad02511d53bd9b07ce69067f1dfc9486ce0c7100b8e4633308294cea -size 542926 +oid sha256:b10764b80ee672754419cbbcd528c742d40a7e418adbb19b485705edde87952d +size 548846 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 90cf55496a8..6d2a090065d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:efbed00d0501a97ff5c761568a49c9d6f824243330c44321af733123cfa267ef -size 582646 +oid sha256:e7ca24d84a2dcee90546da0af926bd7748a7848df85fb73f5a9d59244dcbd0e3 +size 597446 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 694a81b7db7..182b771c925 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:860280f9d8327b3204cdf1d93e550a313cef06c04e1fd2593905278af9091f31 -size 507848 +oid sha256:a1fd2da4d2f073b2eab95d39620aea19263698b51fab33a31cb889f471a5bc5d +size 513718 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ebea16e914e..5dc0b2a4cf0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2b179fa9f7cbc3e7243b657c1e9ac52de0b40a464433834b7be395d76c7152cf -size 615968 +oid sha256:9787aad80f6da3a05f12435de7fedd0666139eca26a064319240bf279b84ec72 +size 628401 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4b4e892ce78..9b9de4d5047 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ad309788bd0d18848ca49ec49ad667be7b05c2ed05ea0b0ec5262d1d48317a0 -size 585034 +oid sha256:f53c0d67a9b9c9c38559f0b29799e06e5cbfde104e49f5020546c9127c10e307 +size 595886 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index aa9f65d638f..0cdca3df714 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e41519a59c986f003966e14445942f10a12480cdf238622c883642641514fa0e -size 610486 +oid sha256:0239b5566e04dcaefbbf14ca953ec253e0d02f27218cb744f707aed7dde6d135 +size 622227 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 89fb179db94..a39d6196d95 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41ad4b1bacebcd221000707e268b3b66f6c0ead8c23c7643d58d46430a9e660d -size 579896 +oid sha256:b3fefc4b915cad18712127a4396d740b34d9b90857a4803b6b5e1cd86435c9b5 +size 590552 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 91601a3aead..a03b4b4c9ec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b2a0174ec1de60fc6f250e3819951783ca08230052816cab6591666cf5b3e4f -size 612396 +oid sha256:ccbc73bd1dce98bf11edc8842a4a9168acd5b4aa09ac954fe1260859a3f942fa +size 635435 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index fb1170601c4..4ed24849d8d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cda3ec923958943a03f71a3a9a0fccd24be2a9783ef1e0f6d945a754b0d7e846 -size 535822 +oid sha256:f4e985e08caa02b1d32ccf683758852ea7d3e93adce8126c1be000ad617314ea +size 541692 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 438cef4e500..336e7eec42f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de07cc9ca662f8daece0dff35f5d61fc68fb2317896acb7e57035744e2de64bc -size 575394 +oid sha256:98a59b0ed017492b4dc6538a3b91ea969f0a8cf05ff39106e7eb7652b32e34e0 +size 590390 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9f70496a6e3..40b9cd4ea3c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58f1871efac47efee53625ab60ade6dd48fb69ba067b49f81fcdb80760519a6f -size 500694 +oid sha256:2c58ad40b7f90ad9bb66574bb751502a302054fcb4b9bb7c33cf055c7938edaf +size 506566 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index be6b35057c4..527a952f5f3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b48c6d3b9c4a86c454489a8cd410de32a44f029a53faf9573b15501dbff48227 -size 644369 +oid sha256:c2d338de0c250262c6be4bc698f9e44c10a682689c2bb321c5d8c209eda56e32 +size 656505 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e1c67d01106..58c6aa02367 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83210e57b7d40d9a8fe587bb10cca02795e98519c9c8e589592515364ad21825 -size 613532 +oid sha256:dd230bdad3906d96c77d9ed17642fe23434976125ea4680b433f60bade7e037f +size 623301 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 81f36b3962c..a090c3ebcf5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b1bf492cb6ad321969496a8e7395c95ec765d8758b6d7ccf77943a3df4b752c7 -size 636617 +oid sha256:5fcab9a5627b784d5ec54ba24a2bd8eda3736ea3f8181a67993ec40cf0316141 +size 648753 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 06dc37a4549..cbe51176362 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ea8afaec71408b7b1c0d74ed3decf50836ed30444a316d950a3106284631a5c -size 606028 +oid sha256:385fc2973298eef1e1e838fc351324529633f614d5596ce59d692cc485b61ebb +size 615698 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index c4ef3626dff..4980919cd82 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd36cc207acbfacc2b1d4dfad7b17a813f2dc84dd9e820165cf44f44e127d438 -size 645483 +oid sha256:ec6f604dd4d6bcf016a6b07e604e8fe543f9479f6acf56777f08f834cdbef004 +size 659889 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8c33b353a15..ba1106cf021 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:215676818f874a21a02e4abba118acf21aff2aa2c331219f5f2e6e657af7c9d3 -size 555294 +oid sha256:c07712904fa3f9d301bc9ada10be07b8c3c006c8a682d00fa4cc9878d75afb0d +size 562150 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 560f0046a22..2a61c4ce14b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:49ba7bafae6046c1e188d24f954535312b1f56f8a037bdf2666fcda6adc266a7 -size 607840 +oid sha256:fa5bee3542dfe634b663d39133f486823200812372496969e5e4c0197f29c240 +size 622245 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index efc95595144..f7092e7806c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e4a38cd580eb2a68a61f724f5a7dd9194a883caf216221c54f77e931ed9173df -size 519376 +oid sha256:e83992c6e43277a9af05ae8085b4d055b382aed948c32164cd9f7f1e2fb4ba19 +size 525444 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 20c3bf8b990..2aec5b2543f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15f1305d1ee3459cfca5f421d2fc923246a661697d1faa8649a66d4e47b9f565 -size 637215 +oid sha256:776fe324085ed8de9713a4fcde458c3b015d0aff5fa393a76607c7372ba34296 +size 649351 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 500e7a1ae64..4e3152475f4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28f5ed74ea5f46c6589b6ca1ab1f8152cf0225620cac4ecb437ccdbffec34932 -size 606380 +oid sha256:ffabc175d45610fe392651713c2738ffb0d32c00a6d2aea34cb4a8c541e30ebf +size 616148 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3de71b4b17b..99dde0ae576 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ac19958b5b7c8f0a808fee367e1e939e8398007911332fa0e7a5bde3c4a6173a -size 629465 +oid sha256:c3da4e3ed524faacbb234229f73504209f858ed706739b727bb769a04cf30152 +size 641601 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2faed8b3e08..3de2e33ffe8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d4cdc5c285c4eda06cc9f3a4759e7606401fd6b649db42bfe1eae63cbb8b1b55 -size 598874 +oid sha256:cb91ccb7165c4544da0952781571489dc68bcf74147b01d3caca4e0540b254a7 +size 608544 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 556a4991194..b7728f20919 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c87e1bda10a4151bc2238a2ecb8bc8bd34a3a08ed57ee258768b4ac1200be577 -size 638331 +oid sha256:e2409c20275ebb571090b68fe639b63b1d5c2a17831ef90009e5516ff69f15b7 +size 651947 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8c9f7ba2ed6..3297f60fda5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2c9ab15c13af1c5970ea7ad7ab5cf53c06c8e4cef5ee31df83417635bcaed2d -size 548930 +oid sha256:9baad3aeb0e8d3791be84852b19c81b3b5f7b1ebebbc8ba3efcc7a74db35c92c +size 554998 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 0db36b850dc..f2eab9cd116 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:025d4b6415d1fec4387d7200142309e0f9dd1c928be57ec4ef17e56fb31589d7 -size 600736 +oid sha256:715c1a61b762e914a73c66575d4aee0088e01d4b2942188bdd76b7d832116b63 +size 615140 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index cb1d60dafd2..3b09ac475ee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a3300531742f67db5587cad4580d8f3bc9cb28f5e911941f0673e0533eaacd0 -size 512224 +oid sha256:c7a4ecf2dc27e58d8e554d75284595a2fc319b79f8677472cd1cd81a27ecf60b +size 518292 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index cdcc1dee8d9..2a859bb3de2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4bff377e73a40d06660cf08cd685afc202a07a625089a6835ac36aa806ebe288 -size 752579 +oid sha256:1d5f582650876e15997f73757f1c8dcc2db9e09853769941cea56d35e62eb581 +size 779959 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 295f24a4350..b85762139f1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8e93121d7ae7faee69e54851dc08c01042ad8c29976548b8a58c34c508ecb663 -size 749909 +oid sha256:29fee2054864f7dabe663ffe8fbe776a473fe14f2d927ed4076079c0f863a686 +size 759873 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 41065d550e8..641731e5fd6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3fa0cef0bc54509a87703772d4e67c11e1eba084b25cb3cbb54e6e3ecc9a5ab7 -size 713683 +oid sha256:28e485fae85c97ca8248338910e0b0960a096fcd6ddfd1374472ef1c9069d58b +size 730407 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 55d13a09ca7..e6dc8d481a9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b305c79735452391a065966e291311a0146e721d91adac0e3d85f2406a4ea1bd -size 643277 +oid sha256:cfcb0551c779f170f0863466379882cd563b618621a67ba91e4aa0c63ab9844e +size 648013 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index cff88665806..b9f294b5f06 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75a4d1f0f2b17f4db62fc331a23be05659176477edab535025349cf91daa24e1 -size 744635 +oid sha256:e10a0eb4506880903104563f541c0ee46fa9950923032e0615b0d7ac90c29e1c +size 772855 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 05b1d4d33f1..cbb52753853 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:acbbc5f1f65934661e44e8c5bacbd0b76873c4b80db90dc5e98d78c8cabb29df -size 742755 +oid sha256:f17d7fd9c908f58e921f8847ced17ad468b24e02e20dd5984273ec6d661a892b +size 752721 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 4751a2a8a1b..1573d6082ed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ead1f3b92b4a323bb4e227b869f023b7fe50282ec1363dcf69d3693a8f5d985 -size 706531 +oid sha256:ffe0c4faf416c8f8d03756a55182ab737a136207ccf7e5cfb7b31b70ee94a739 +size 723255 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index aa3b40dfe52..50bb59490f7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f395df26e7b492f387127276028e47986de44dd595997952f6ecda25204a820 -size 636173 +oid sha256:7a1a13135fd3de96433aca94077e8962f59251e516813d662835ae34c0f4aff5 +size 640859 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e483687de46..db3f64b851e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e03e08cb620fdbd8e1f71a5263dd9878e304bc8b02c09f64fde8354cbd87911f -size 633335 +oid sha256:cd46c840787cecb18e09d972a21ab9f17c1bf59933e48715a942528cabf8b771 +size 647691 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 904807a7c4a..4e347e8c3a5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:78b300e4a2f60a090e3203fbbbfeb6f7607b83168c23e6fea45c53483e9000fb -size 581878 +oid sha256:8bde1f54b99ec21670da3b0d0d2b8d0fe90e478a4d338bb9441b418e87eadb74 +size 592188 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index dd7022fb223..afdc22b57e0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbd3557aab43514d8ad9d21d4f04d4d4c8de7e97e201707cceb39da163fceefe -size 623463 +oid sha256:ac167833a3adc3421a16558dd7671ac57b7f600fec15ed133bea90e7801c8a9a +size 636881 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index fe7b1ad515f..f9b5f8dac59 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24922cb21cc0d36d88f46a26ccb16ea1296eecd1771ef9480856f4589630ab1c -size 577334 +oid sha256:d79ca6fca8528b59982bf4aeed6b1c0b7d574f24bd324492d1896850eaac75b0 +size 588038 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 8aec52761e2..24bab5ef84c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96f25d3eade345650a8ab88f1e356a0eced934d4fdb179bacc0707c286644a1c -size 618861 +oid sha256:9744b5cacb4307a5519b121b9abe510bf716fb698bde372c81c0aaa3fe795329 +size 633119 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7d26b8e57ac..d4404793bed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1b879fc39cece54a143099ec5829a77b713dd6b1621c9263a81836537b8b41cf -size 533702 +oid sha256:06d4560fb2373272a3f89421d65057c58a96727dfeed18d9282b0427858f05b6 +size 539574 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index a2129917301..d3ec4907f30 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4bc2eb1d1481209969cfa06765b5ffb46382b6723d1e616490faa126a21f081e -size 573076 +oid sha256:6ef0b351a7eb4e04cf37f8194cea1acb03bc9652387653e5a12587a64c261d34 +size 587532 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ab0a2dee1db..12bab640803 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df24f8fe5373a0afcb0561b784857c6fc0827e09b7cd0fc9e595b1bf5ac8d8b6 -size 498132 +oid sha256:336080bc2b9d01e08579f338e7e51b376ed5fa2109f48d9a37197b6f4d39fdca +size 504002 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index abafd131a3f..6dcdb7ef187 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e6a2198a88122694f043ad4d12eb5c858a6102dcb87b9a5de30075740c204163 -size 626183 +oid sha256:8979dcf44b38e33ee5bd415dfb064b920eb35985f9a24d58255a2d79a6cc48df +size 640539 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b5db11f3147..ca539c23785 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5fbe9b4e32ee80717bcb9f3ac3ac01c60136b05ceeb107c1f18c2fcaca83e055 -size 574724 +oid sha256:941896d14b2fd86b5dfef8f8b5f21e893ca0efd806ad9061877b8ab7baadd057 +size 585036 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c158f4db978..3288d46dce0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:560c9c53ef7400e2e374b669bc76a393218e69843041b8f4e28963bb87883fe2 -size 615520 +oid sha256:163d12b584bb7f18609870981fd0715c9f9e0c5b05f4cf41bc48de48e71b9e6d +size 629729 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f3d433382ed..0f2c20ec926 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:26986f5d3d10f874965e68e50706e70229796ac1eb9f235859a02c3ec7a69aeb -size 570180 +oid sha256:777bb512ff15898bbd1d638e90698e6317a9307d9b305b662a62b84db5d5366e +size 580886 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index dbc92515647..1342725c952 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:977cdad831d861197913ab5d1f9d744ce3d623e845cccb19fb59afd62a17bd47 -size 611706 +oid sha256:6c2ab207ba5d708d71c85ae4348e1798be0e26789ce30f2f8a265e5d8a62c8a5 +size 625965 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2d26e49f267..c313420bf46 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be480bc21f10e258cb3122042259e26f435915fd7aae8af2fe5c67fc1817eacd -size 526550 +oid sha256:0f0cec0571a5204b17fb46c14eb44ff20217bc56b699b676fe15c24a2455e73e +size 532470 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 6c94c02e3fe..b141025281e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1195686d46f41746171bfe12254aac690fd9c35fd6980a4ebdf50b6ea9a2b532 -size 565924 +oid sha256:a01f81e28bc9d5c131bbce3cbd618914a4e271094690ac9a609e929086200d33 +size 580328 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a66e5618975..b4dd285ac11 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70c77cd4d3330a892e69721a03e86dd915501b824e1fade735d6496a13469dc7 -size 490978 +oid sha256:bec49b63ed553d3e9e45fece3ce773d6f7c5c5393f323206e28461b748db5b89 +size 496848 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d11e5dc6134..822f7b98f31 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96c23a7970bca5b0524bc7cb7c19b1790e9f98bc6f370c1cc457274c235b41a7 -size 654681 +oid sha256:6f25cc062de26bc2d00d8ff2ddbf706cd21c7009f43e812cf59ba50519f7334f +size 667853 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 74e5dd9b7bc..00b592958e8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:57cc0dba3564a194000b1c8a9d815ca3b8765e5692515b90d20d9b406756fc1e -size 602632 +oid sha256:f5c0401c9948540523ed61a4eb3fc218a3945906837c1ec82749996d9dddfa2f +size 600412 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ecff6159c81..995e50bdd54 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ea45d3a4c8aab24425e0ffa99c2fc0ab6fc4d65cd663bdaf258df8256d40bfe -size 643231 +oid sha256:09bddfc7407cde32b5d2874064e29266ce096b63bae7bd23a2e23cfb72c5de35 +size 655515 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7944199ade0..2140c9b9e42 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d9763b1cd440ffe8ef5ae13fef694b83f0a534c90bc1390246c6505300792c5 -size 595670 +oid sha256:2937164539df66da64d3f5f3ea2057374b0d7446d58351b489ef44d88b449017 +size 606228 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 89c1edfa0ab..9985cfc8bba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de59f257349c3aa59df31ecf63584866079d3adfddb808462c337ec8de4f6d59 -size 636013 +oid sha256:8e0bd9fdf51f58b7bbf2d473403dc2bceb0d4fc759eb9e999419b5bd77b1a8cc +size 650419 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index d87a1ec3cda..4068f014ade 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c31c452886f194c12d5f6f96c9b57d9a7d9bfc1ce5ba1fe05637201dafece830 -size 546760 +oid sha256:cac1bdf6203634575214b116526eb95202d3d3b6aae73fd0f6b9282242b4a493 +size 552828 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 5c48405d5e9..170ae86d4b9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f9ae576d7f2e1ce4ad5a53bdbba12750ece41ed8a911815611812790e4784ad -size 595606 +oid sha256:2b5628708d366294266e0d3671e43dde8101006303ba34a4bf23b7c2ff3c50ae +size 610802 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d1c94d09876..15f7ee3c47a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e1e84fc07e761173b2c8bf9f9aa9ae6da2a02134161b5699a863ba3c288069b -size 510400 +oid sha256:7de1845049533cf30dfb96c22c9efa2b650e93a818cdc83deb5001d94acd45cd +size 516468 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6c24eedf417..da30074c2b9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1a5d8c4e8a5d89a2df8f2334d02118d3fda78b2641a9ce117712ccf11092ef9 -size 647529 +oid sha256:17311478ebc24f35a52800cc4249bfb892c30c28dc910df62b70a5a6bbabb9c0 +size 660701 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 24ff18b5bd5..f35711a2f13 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3367c1125805f4affbd8999be327266d4a1ad885dba97234c5fc0e68d24a217a -size 595478 +oid sha256:a8f7244a24eded74f3bb75c5e278681c6e926ab8a866116d9257c64289018d5c +size 593258 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9cec7a5f628..ab097792c21 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:029d2aee5019b303da85e65d38ebdb82a567e368753a78177bf373096fd897b8 -size 636077 +oid sha256:27ae90f02d6047148fdda0f85d805ebad4553bfbb5727a948f5a4658ebda2f6a +size 648361 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 24ea7bb2bb3..d8fe3b9cd1d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e7eece7ac5d6c3785d3c476190f1f872149b14bda264600d2e4a4898e7302e0 -size 588566 +oid sha256:00041d26f664ca34b5957341f92d5e7f0a0cbbfcff7715c2818bb2b3cd72f26c +size 599074 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 7d362229d7c..db07a466847 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8107cfb1b0f58dd5878d3cb31b2e23a738069ee98cce5a1f95f393fa5cc852ef -size 628861 +oid sha256:a69e72ac6fcec06562d15d362b37c61ae482f3fe24fdcf7be16c31f699d8a141 +size 643265 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4e9575fe728..e3811ddfa33 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:564a17769d93596fb364f85b20b75e20372c780df9d6a7a1740cdcdb39340221 -size 539608 +oid sha256:73aba360bc50466a45e7129f75064fca2361cae29aaa708706af4702349f68e2 +size 545676 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index c04eefd3fc3..b4a723b4d05 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:010ce1297bc6d0355d811daacf84924a45c0b2ed8081280cbb63eba1ce397458 -size 588454 +oid sha256:76ed01cf78ce7b03373cbe4a7521b61f16ac161bb84a4b838af03994fd06ac58 +size 603648 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e5433f5547e..e484c191501 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f7a8ba649250cf9338f2598f83a7cac1218fa99b8645aaae7ce1a7205ce4388 -size 503246 +oid sha256:8b6b7854e060dc07d765dd9e03aa8cda15703bbb0ca782387866b1256e00f0f0 +size 509314 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 911140ae512..9ac2e6e7849 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e5c15a6b413103d2c804c207fb4442499a23233e0b32b28950bd528d86b76af -size 733679 +oid sha256:0beb6fc881a2fd5149c7a12524e7691afbe08b7594b9df42ccb4d4a353ec3187 +size 746211 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3e488423e5a..6e2aa456d94 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32e7f561b4f47343fcc199798796afaf4845a00e613cac96015b7d741e95f270 -size 685577 +oid sha256:dffa6066833856c2c0c0b19e09c2dc59781cb074dc2d38b831f5b31bfd8aed31 +size 696135 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index abe5bf2911f..7a0916a03c1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a26cf9f6ec94f159b8d11659865332b5f26ebb46f0e101dea40b16cda7c1b804 -size 718085 +oid sha256:b705369842f01410f441681c2ec603fcec5303647ea6fe1b2550ba12f62c857f +size 730417 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4916afbfd92..272e9931b6f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dce901913bbdd0dacde1d08b4fb5a45ffa95e9e8c6283eced2897e40af65b12b -size 676495 +oid sha256:0a99ee965f3cfbd44f177723ac2785017e340cd6ccbcac0c6ed9fd17b6e0a01b +size 686065 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index ae410bfe60b..a5a69abd3a2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f96093e306278994018194817cad4dd7b571360f65abc839d5a29ad7b07b2382 -size 714419 +oid sha256:0241be1cc86b081ce2956e6ffedd48f52190de98ea8962945f966c7d168d671c +size 729515 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9aa15d3866d..0f57266273c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9fb6998e04530c09b2d621c31cb78711dcd35dfe86e2c30bff07e95c66768a70 -size 630397 +oid sha256:792a28fe37399884396ed750dc7bc822cacc140503785903c8138e0a3a782754 +size 636515 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index a7d350f21ed..0922f979b9b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8c154aede63ad4a602adef68488230a82c632e46bf078158d1d2f157749361a9 -size 665973 +oid sha256:399c1e157ccd9a6271294ae72f5246cad058ac862c52ad09bb981b317267666a +size 680033 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 21980d1a26d..4ec898b5978 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d530a41c4d4e3797d224656b449bafe5581ba0742c052ee163b93d468db71c1e -size 590188 +oid sha256:13dd9b797e9b9e215ad58b237850001b101fc2c94ca54cbfeab98991d54d8c1a +size 597044 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c649bd49953..0468193abc6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:33b549b2fdcf419d1fe9b8f0536fed41ff13ccfbaebcc88af851c057cec1db56 -size 720163 +oid sha256:7c7927563d9cebaf47e9cc279095b69bc1db71fef6fa861e60afefa578954511 +size 732693 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0a89b6da48a..a032a0aa325 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:909f721e5684c81853dc8424144ec11b06794540359455520d55c6f596320487 -size 672061 +oid sha256:1a0ee92c254a4518dbf1eb875cf7fbf5c39d64811b86f3262159b33a6744f0de +size 681877 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 26f6af187d2..de71c8ce234 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2fa4979abfb3f47d19a82851a3d20dede1ded2054bf9c7702fd6241c78c2d175 -size 703777 +oid sha256:86baf1d9bb0eacced514c1da21582272b23431a0d50181a8a0da130b4886d0c3 +size 716901 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index bdc2e238a34..f28af2eb82e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c641972bfa961cb7d90e26bb4518c964d6f055a1fce7a9a9bf24c93ce5970195 -size 662187 +oid sha256:c6bc9ba62b8cc16d51d2fcc0ab6c9d38e9b487a2f49b3b3918e45460088983b2 +size 672547 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 3aa6e8c7b23..48ac88d4e29 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b8ecdb0184b6339351669b19e5f14b0ff8832524335f46b6194aa04effa538e -size 700951 +oid sha256:ffcd73be21ddf5bbb78396109c62d99dd436328450041c07409e262692967396 +size 715999 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 88e8b27e34b..c1081c1b213 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d80d99668579f4a44d5ad7d3f23218bd5fcbcae75fc4fe616f9ce071a90a4f17 -size 616138 +oid sha256:79e038f2a3df4799e171c8485661b997e3e4a8696da62b6ea4bc99696916a3d1 +size 622997 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index c55af8cd62d..ebf3486ca56 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:21e6e6a8cb45bc3f06edf7b4890f751e3a462c6c44ad4be85d72d11b9a248ca3 -size 652357 +oid sha256:476f26e27901f7bb8d294a3960c334611f64516d5c9f4953e3fa9fe1c1378ef8 +size 666663 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 55ee2f1b599..671b4e21b26 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da464dcb170a75cdd9f6b8524af1ad78a5c1e55bdd181201bedb6492d479bd8d -size 576670 +oid sha256:c4f0ad2d516ac4a31ad4d993adb7d420a4c346fb61bc1aae60665e0b0ba5e1f3 +size 583576 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ede40d74cb6..90d829fc46f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e59137834a179bc621d65b8bec03098fe14cb55e8c3efa5ae0612e454e9027fe -size 754137 +oid sha256:cb2375beabb0a6c1af2fd0ab6a4eb98e4adf8c44552383443a2ddfc6acab3364 +size 767359 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e56a5356a5b..b0ec89d3c45 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0d7ddbf6a7ac2d00105a1c2525c319559953f48cc375fe9fd2d12565ccb1bc0 -size 705197 +oid sha256:31227408e0ec22b1b97e3e8bdea0242c611515547ced30533891f429ef9f5a0f +size 715113 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 59ec1eccd8d..26af5677701 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c357fe4a88647811029d5be25cc0f891e4ce9de42968ea6153323504ebd1b41d -size 737753 +oid sha256:ed0e8d2ea32cfe84e271ae6200431cbfc698ca26c443d3c53dba8a594f835f06 +size 749987 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5c0a2999a5d..51e27570e8f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:104cd02d39eb67e331a77dc60252d0047c155f3e3935fdc67591dfe70471893c -size 694535 +oid sha256:0092c609e6e525a72639b3ac8969dbdf9823a1014e7f6a1bfcf6b3db57485337 +size 704205 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 30d2c060482..6f30b3e6db2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:88cc1c6f18f980b26d23028892de570c9b97728ae2ccd995a96eeaf55153f24d -size 731573 +oid sha256:0649dab7d85eceb65edb1ae76f3540a776dad2891d5c246aa6bd70c8312d4809 +size 746767 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f9b74688e87..4807daa6ccc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0dde2f3b517809672ba49f02bdaca3fee2b8f276b8432543d2fac9f7659f336c -size 639509 +oid sha256:179a6bda5f00d0d1b8403de1a5ac38813d4871207da6ce5ee92925a5e221b8a5 +size 645577 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 450cb9685b1..8cb3da25d71 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec2292a9db1dcda981a5dc05dc2db1da110676c063b53972fbba8fe87f28fbec -size 689291 +oid sha256:722a07d174eb689e5c961bdd6c694cfaa776a75b31b13408fb65a9f617536464 +size 703697 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2cff4cc4f8c..c15828d0062 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b9637ba8f7d2cb738202e8d9d1a9f80902bdb65aa87006b2ff166b998a878b60 -size 599250 +oid sha256:41ecf995d437a23759f2a17b9185e1a26174be7674255b95e1d7fbcf2334a686 +size 605318 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 29f45cf20f9..5dcf12791f3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8c4ceb6d7da7a67f7e4e40d1b00238ebcb935344d1c5ec008e5b62685bfb6e9f -size 740621 +oid sha256:3738cbbcc5854e68a6f950ac11a1650a92ed73ebe017eb7dc48a4002451c6515 +size 753053 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index aaa42bb02b1..bb1d8c91bcf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5282faa1f045566400aec296cf2202c59c6464145e471cbd51f8321a559851f3 -size 690891 +oid sha256:4df151c12e77c9e3daf1f89ab5ab15706eb9e4b96e15d06aa127325461725a38 +size 701595 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 97d1497fd08..28eecdbf371 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f881144b29b529121684708866067eb8c5474027c94cdced406b2e860f591bc5 -size 723447 +oid sha256:ee83f142f0940711ba7467c64897a3be0f455e5229d6ddfe9437f3aa519932f3 +size 736471 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 35a2bf118d0..7baf0891c67 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb0bda5e984e64a125a0af38fbbf650dddeffbcff1e144c032ede3276f0f1377 -size 681017 +oid sha256:c8a1e74e84a4176e2676608d17edfd9b448fa8d3258d31f31965c8dc82b6da1e +size 690737 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index b45b2c7762c..aa3709f3f7a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de51ae39be050fd7b6809ac6ab5d60e621fc5761920f92049cba0f997262bbb5 -size 718105 +oid sha256:f7152b725433c96a5858856c475440c06a1c978465037cf09ed69ec15df50f8d +size 733299 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6d4da246a3d..849da7b4d8f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:48a7f8b62313c0b37a94aee9cb1228c50a2e6e61a73f05dcaf0d70539141bed6 -size 625991 +oid sha256:6c1df12a1415b8c10a9dc71cac7b42ee5e6e193865f1446d12c0ef0e0b349033 +size 632059 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 0b3a6cb5467..2ce26928e39 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f3254fcd7f938c5a2dff9d382f80f8e12d37ed6cd16ec97640cc7912aa57e92b -size 675775 +oid sha256:e8949da078594c856a93d5a73254fbcba7724d821af487e26afd44e6cf220685 +size 690229 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6b46040f0c8..097a6fb3bfc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:26c8645068f185b863d08668a689dacf7f6dd1284a7cddf52b54514dc296a25f -size 585732 +oid sha256:b1a80f0c7331224a7070474c73dcc3d011a9f243081d70d7b30ccf608e06cb7f +size 591850 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 9e19ddda371..a2a05d2dade 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c81f8c995922f8f18f1690af5225be3c822ad244914384207963637a05e193c -size 693573 +oid sha256:6ea96d0a805945e8305b8304e135773915d5666851699bbeca14ebee19033e5c +size 704377 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index cbd9af5d2d1..95014c6e010 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b29dcf1b0863a28fb6e9ce7c485b8bc4f16ce343db3a68086c03451902d1f0de -size 687155 +oid sha256:f4fc8066b5d92ae05ba3c9217916f9ab923304c1700a6b73fddbd8ae9f8b8afc +size 691791 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 339cc7e0270..d85c82d0cd8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:581ab86f86ac99c93858e5489df27cd2a35b75502a035cdbb5431de3d78e051e -size 665335 +oid sha256:a588d44a8325e532ecc503ae52b278bd0cc4ee9e2a92efb6ef530356495a1d27 +size 680381 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 51e8f3d90eb..d85a8f92ad0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a41d9aa812850466a454431a6e8012563fb52ae21a82a8a173bf50ebead0876 -size 581410 +oid sha256:d77df5d4bfba357f417c1694046a263ed79bd6fe201583dd028c4e856dc113c6 +size 589204 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index d0bbb3fb318..6241d55d6fa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5bf17d24a11c655d34be79f913b41e90d96856e1f9ae7618dfe181d08a97e1e8 -size 686421 +oid sha256:d9479b4ac9c2765885fad879a63d1375baa306c7c2a28889fa96bde22adb6c01 +size 697225 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 2eb4dbe10ea..786b816c8e0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e290fc3607267c65d8e2230647d505217177f67df6b3fab3507bccd816e0d526 -size 680001 +oid sha256:f5026a3cb40e17c13506e40295511bf3b09323fad3b143e89ab855a5698e604d +size 684639 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index d713f31b1b7..33abcea5e7d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2496347b3eeadd08ae1c914a8318ff0aca9529d944299567be88db008948f03 -size 657393 +oid sha256:a621faedec5a7f2b102073e11acb99d1748c0c12c86c71f145dd890708353031 +size 674017 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 44def618e95..d8419b86504 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2a8d3cbf18faa40a4487b8526cd57940fcd2d53643aa91c4dd80d39ac99534fb -size 574256 +oid sha256:c02e6c3463480c7154f26d349fbe3c0eece29493fc2212952eebd46e7c6288dc +size 582052 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6c0837e1c24..57e417c10d4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ea54e9bcc378b8a940a431945964131325c2a7540ceccbb40866cce78d6e8c3 -size 612218 +oid sha256:eed3965e632c5b01f391f713c7872125ddaf8b11ebf6e4bf2b9f7c7f3d2bcd4e +size 625391 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 47fbbb592ab..e4c2261822c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a0bbd5dd39a48bec1e724dc13f88d0f3f27897cef9f94388ea6f919722ef63d -size 582074 +oid sha256:42b83878af54454754242abb30a8b2f56e1d3f8474f575425abdc417a2f9e41b +size 592088 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b246f466957..85ea7369858 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75216e6e039f8df46fddfbe7f2d269ab82a8168576855fbb66b6f509e791f394 -size 606538 +oid sha256:d4edf977d3a4b6dfa2d64d1408c5f9dfbfc1aecdef212d5a6e91035c2fe1023d +size 619021 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 97bdad97569..b04b9c058d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1e8bf6994b83bde88bd16d323ee444c2f1a3e4f7f1eb5bfaa974291cfc61a036 -size 576740 +oid sha256:6b8cab1adbbd5c20b62fb81136abe2289d9f49240680c3a02680d89b88f57cd2 +size 587346 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 9c68bb994c5..f2b4b65f9ad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e3a224a22289fc58100449046b72b95d0fba05d0762acc43d6c28c046e21bf1b -size 609928 +oid sha256:d1503e65a608938a28df30c723daa6ddedff81be5334d70872f85e3d80c34a6f +size 623397 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6d023b12c85..32b14ec61f9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f10be31e7b063f0bb641e04c3692a9412146573e72b811c9a0b515619bb9542b -size 533158 +oid sha256:06dd7ab370a98435e8302a374242dd85989061d78cb36a6c7d3c94abcbc0a819 +size 539916 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index c34c87c854a..d478ddbbf59 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d3cbe89f2f33fa4e116a8a65ddab116250a2271413f32184e800508098d13616 -size 573716 +oid sha256:3fdaaa427568b4299a408637a08fd6dea0e09eeff3461939e9ff3a1322b3d566 +size 587826 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 389f41fddc0..c9b17fd9cbd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:610507ba5742f090149e2dbae736b537d448808debe2001b773a21037da7bd27 -size 498870 +oid sha256:16d181fb143937562f3c54f0a724afb182399ae62a0cec4ddb189bfb69634b21 +size 504790 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a588cc8f4f4..06758068fc9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ac3906f36a9dc755a5e1e0c403f23c53317a5063c21ce107e0c197e6a68db14 -size 605064 +oid sha256:cfafc0feaae806aa06cc268d0d7f2e6c924d745db4538768e7bacc873f87cea6 +size 618237 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 703db026c98..7da72f5c128 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c0664d3884d60adcd28d638cc1c681abb9e6556b6f49d3c9eaea0135e622ac5 -size 574920 +oid sha256:09f8738bd8298857424dec4ba69b276728550ea30d65e891eadf405854b2549d +size 584934 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8cc05bc0fee..6b9551bae1b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f5f5f2533134032a0b2235fca6a642a29adc21e067965b90ba7e0c97f3cbd0d5 -size 599386 +oid sha256:ee582858760ced4c06d25006bd320edaae86b6221744a9c38bf7a68621a35205 +size 611866 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7e2c8fd58da..e341f071483 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1fd41639db42fc01d3542771f435cdd7030b7875fb7fdc0bd7cae29db44d4dd -size 568796 +oid sha256:22ca1155f4c7ba7e6af489bda3e0702da660e2201ea2cdd1c4fad5e7c0a97ddb +size 580192 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 58f0b5acb45..219dd396b2b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a6eb018bf12c1cf53789b18e5f66a620d4228a42798fe3e64e24238ea2d92d45 -size 602676 +oid sha256:70d9477ee26b35d57c176c646bf145a792cadb8e520e46839dea9c6cd1454781 +size 616342 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 41936bbca21..78b9646785e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:59cdae0eecfc17307685bb9a9fcd838f6755efc61e4ffb8e119b907c5f66a42e -size 526004 +oid sha256:7727e805b5aa9ccbef2e826ecdc0c290c229ab3981a81082fd20ab00e499e4fc +size 531974 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 1249fca7c58..01dd9cecdff 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1733dcb75f7c60901288edbb244a61650c587758c400b0faf200e8aea4fc7953 -size 566464 +oid sha256:2c9122f4122f294db2fb2c34dcc8b5e18b5c2f3653383fe539135687fb2daef0 +size 580770 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8392cbdfa8b..d67992d77c8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02330d70493cf12805721e7d7a006136b75ef63db1a1a4af69e8ab0167742f1f -size 491766 +oid sha256:8c1be1fbfa944187fbd153671d1e701c1e055d4308e417175ee3bab4c87b082a +size 496846 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 45bba0a378a..a6cf17787b5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e3137dabed3a009861720ea96c3efb40724470191deafb80708e4d310becd8e -size 633467 +oid sha256:39d952d9f5aea1e16a84605e8491ea78f055d094d6e33fe5c3edd208178fc0ef +size 646391 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 548b8a965e9..104040dc4d4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5a083a3233e4b50b9cc4488492d1e3e94a0d60e943be98a73721b3faabba585 -size 601840 +oid sha256:002d1ee299798428ace3653b7195036b30f06c49d3e19b1de3231aa900fe8497 +size 601200 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a1c79342c91..7348931ab8b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:82794efb0fc3210a20324352438e9169b8df2b1f13dc05e4dcfb9a492f576d2e -size 626307 +oid sha256:c4e0456a7ce459538c58632f5a9b493615cf88a18b1ce0b497faef75d7f595f5 +size 637653 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 05de9644414..a9f5bd61d6c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ee8ab3c380126852b99d4aa49113e9d5bbe28621cbc242b91a7c4046d4a2dbf -size 594928 +oid sha256:3a30d4fa26ffce14e37bf7a81999a2182f39d1b8c20b1014b8a7c4b1892796d0 +size 605338 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 52a32ee864a..8705493ecab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b5835ab851e6b4430969e80f9d1a049c4a9e60a8144e1fd086fedd9fa632158 -size 635715 +oid sha256:59045342ea81e85a8e5ecb3e12886ed1769daf16e2bfdc9668e47f2dbcd9aa00 +size 650121 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 19b57894cbe..0962eacdcea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee3c55aaccf03b3a7fdca0945034f4569e963d770d0fb01162b4101ca6cd918b -size 546266 +oid sha256:27dced71f953ce5c2fffc6753f6e6a3510af4cee6c9617dd97f0f2fd234df297 +size 552432 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index eab32ca04ad..1003d8e18d8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa5243d97949d39c23677ebd64bae461d932fe7618df3041457fcabc6e0ffede -size 596444 +oid sha256:ac39eca53e6a31a1370ccbeb536b13a2dd39ee840876b63fd80cf21d41d53aed +size 609220 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index dab1cf325a3..29090456455 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f99e75c774695e259475743feb89bfe8f784ccb0070b2eb6b2db81bc59ce9b9c -size 509658 +oid sha256:5ee4be833cedef6beec673eb3d9547568768835550fe4568b81a6b4584df32b7 +size 515726 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b87ea290453..45bfe3d31d6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c413c918785c8d858525bf4e08792ff84ac9f27779efa419af7fcda679ad998 -size 626313 +oid sha256:9e74387707521c818db33599e4e37a115cae34ff2a430f8132712f2cfd0d91c9 +size 639239 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5c160087934..c385f3816a7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:998723401e5e28532dbe054e2cc1429371e2698e310132b4bbedcc9b654b1942 -size 594688 +oid sha256:63d351acbd29261b52a71aef646f1f917a88802781fe503bdee5fc8e7f45059f +size 594836 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 05f0072a021..9ab0367cd19 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01f7e6c08ba05781b77e567ac6c394aec3d53f73906b948fc7b2408ba1e24bec -size 619153 +oid sha256:89b3e80b3a9700e7176b9e60743d755f2c738a2bf3fadda0836f43e9a663fa31 +size 630501 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 86381050251..b21e68b710c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:12db5ab547c4c7ab3c6530b8b4a69aac2f1529e5615c29f983ce5b0917fddce2 -size 587774 +oid sha256:cdcaa15f37bb06e6b2986cb3f99aca0f1e7835506a77ec004db01d7c991d2503 +size 598184 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 616b141e302..8928b9b3238 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:356eef98073eeea5e000d980cd3d96c7ce17104c0a09c95847abae723ed89f5d -size 628563 +oid sha256:e46596fe553752a2568a48f663904a430a68210b238271086390797985b21561 +size 642967 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 40d16fff7a2..1a328b30a69 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4721f6c7a9b59ad2b444e258fd6c85fbea82bdefd4048bdf13d152ae70a5b6c3 -size 539112 +oid sha256:22ed5d550085e1f1f7ce163031731281fd635374a01e8a62efb6d13e346873b2 +size 545278 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 46c6f882e76..85aab5b6eee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f85efbf01476a7cc4ead02efddcabab59b150734e16b2f48f55b740bdd032fc1 -size 588500 +oid sha256:ac0b2576b154be0cdf16a3af174b1a911a692253fc84f91833a883d44f7ec793 +size 602068 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3f5a96fca4f..43b52d4d6ab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1982cc88b3a086c434e5029ae30c2b41523986f5cb717f6cb9038f02b1398981 -size 502504 +oid sha256:7a1d64c9dc8ee51f73167669643664c4eb4f16e4fefce46db1850044acd46dfc +size 508572 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 216c89c8305..1ba5b5c5073 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c7ff09874df249dfa472132c7c4140c99d66b1c04ece74338493b08170e69d73 -size 752479 +oid sha256:76ef7eef679a404edc926a8edc5d14deb8076970b32a7079ba40d3fbe6b1ca10 +size 779859 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 761172f7e10..2a60af97844 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79a8cf0d3ca06edab6e548089685db8b27f59398dd52350137c2a55ecf7e3b71 -size 750599 +oid sha256:6fab111edc551f72aa889bf656975e95684fbfde94dc0a90306c0ec76d31b17d +size 760565 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 351d88374b6..e2799b56e3a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:19f07e90af76959f8dd6c303329dfe5445bde50afd5886fefea6b8153c5905e9 -size 720245 +oid sha256:f0aa049a6bdd7b7b4db5df183c8a35a207b029bc9f21dab0365f85469fea10e9 +size 738745 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index c85df41289b..cfefe354d44 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f7f283c8d44e7482308466785cc30c4c5060c350548eaddc3b26dd476203734 -size 651763 +oid sha256:9cc61bb057ffe94edb524a19ef05d86e27700e6c6e36c4889c6df8f4262dac11 +size 657633 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index b2c8b246f22..42d8ac3ee91 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8aa1390538d61dbee08b1a8237d1e244457ec99300825f6b2e12fe4f9bb22a8 -size 745327 +oid sha256:dc2653f023055e31f46263716bb9c5e2f3150ad94c1881c9d5635829d3c404cc +size 772755 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 91c81e55dda..748fda98112 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee133a25e73fa0ebb1100afb910c099f41b05d680e3ff54d00956a2b9136ba74 -size 742657 +oid sha256:1cf1916fa26051e8e348ef9856ce43783c2b18b2459b0989f18084184d40e8d9 +size 753461 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index f4c688707f1..50966a18083 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f458ecc489296bf1def102e85afa71dffcbecb21ca0320c199111ee616a857ff -size 713141 +oid sha256:60c28a7d21ee6bb5892d45dd578d7c646df6e4001639116dd8420b42687eaa32 +size 731591 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 7a30b8a76d8..4955a9a7f85 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbcc04d385e45d550ccfc8039086bae9d5177a1a01f49e799468b63fc2a87e8e -size 644659 +oid sha256:928638384ce82d2a162541f631e7c577a29fc14574984e5c67671f7173b37a3a +size 650479 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 914ed0db923..dec4235452c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e9649fb3fddfafad4d931011e46eae94ced75ee506caa2c636cd1f83b096c252 -size 634027 +oid sha256:9f862c0940c0dab9f074b594b73710dbc89d3016eb91fdd05edbfa00ca17ea32 +size 648383 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d98e4ae3dbd..7e0f4841b7c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3bb50c0f7dda21d8111c1c80fa2fd41c38add350433603f0dc1007b8f3625c2d -size 581780 +oid sha256:53ec29099db23b62e98b849072089b370274a70090543cc581792ab676707b7d +size 592880 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index fe799f3a7af..6beecb8af3f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0c6d829266639789f736cd1bb59c90fdfa04746518ef4b80a9cb9a5a878a045 -size 623365 +oid sha256:53e7f6a921c458e6152b0239b77a301b1a8896b6fe8455e3270b49769db49d5d +size 636783 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 50ed4a51e72..9a220920781 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:71541d5b23ee7fda60b59a910ec0a517fdb09eefb46dc1772a82438f7a3eabe5 -size 577234 +oid sha256:9d8eac42be38651356696a6861f856a9a0c324112a9211933984bf3b7afd13ce +size 588730 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index c7c18886bfb..58a8e73de4d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9cd97cf81bc7d1bd3921c8e4e308c65ff70fcbe9bf97d59b8f7b1b163d479e5b -size 627247 +oid sha256:fd9c78356d8a90b8c232fcb69b97501629710df2f2cd9541583314380131e282 +size 641603 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 184e464f259..acf94a31078 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:39b4d416ffde554bc7f73f967332c985f3698917f7503b81052d804e630111aa -size 541004 +oid sha256:77f588f65a8001f882ace6c9f565d923c57300dba4f2376df2f6ac06a93a3610 +size 547664 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 2addb51eb18..fd695f5e489 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:98cb2614f701dc3848fec3f25703d5d60872a989727f7f6049ffaf1e66515a52 -size 573570 +oid sha256:970a8b575ebed57e4c8966de94c8a82fb428951f7da8a4c0489ed0fd39c5365c +size 587580 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 83a85a417e8..e18652c4f86 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c165f50b11c8a09aafb5b0ade299f856d05600f180c26af35763a8f7247b09a3 -size 499612 +oid sha256:b1ef76181585dbef96224d114fce456b030c8fcd7ba6ffee7c95dc2526eb7df0 +size 505482 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index dd146c3538b..9e6037fdba6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:919aa5ba2bfeede5e15e874109fc3a6fe88ff01d43bb08d4adcba54101b15bf4 -size 626873 +oid sha256:533001cc3cc2ba57926bf5cf9b05d8ffcfe789b7f5a123b372ac43295c69b284 +size 641229 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ce78301dd1e..e5c601e971a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f40e00e49eb4612da2fd00024208cf49d123ce2c979a99e7133dcb8ac91f624 -size 574626 +oid sha256:062c83db8b1652db7b07fc74242c6cdda8e4fc7e9ff68ae8cce06d8196c7f164 +size 584936 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 36bef0c4dee..fbf873958b6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70465e5dec2c86b1e00202e6c1d4e1bc859882276cdf58cc5b4c003987a070f9 -size 616210 +oid sha256:2f42611edcc8f2aae9f7716faada5ffbfc4df51f80f6d92c74608b45edea04e7 +size 629629 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index fb096d5e37c..7d6490944d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:88645c96cf8d080a1f121f2622ad84fe8ec1049472c9f76d81d64745e711ad15 -size 570870 +oid sha256:d68db6629602059d0c6aaafa47dcb5d3662ae37a83ecaeec5b96a507f3c31208 +size 581576 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 235fff5052e..ec938873d65 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38277585d229c2fdb3957bf55468191eb15426585a3a19f5cfe2a10909ba0471 -size 620095 +oid sha256:8f7fefa1f4cc02192eaf2f5aed598df404f57fc4f98ae051dd01668d60f99fb6 +size 634451 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c1a19765465..6d4a617402f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0a370756ff8e4e6b2c1f8f4e65d4beb0765e640b0d512809adaaf8ba52d3be7 -size 533850 +oid sha256:c3e9a1a2a97eb2d777dca06d7969351190d3bd94a9e63191176d47cadc4776a1 +size 540510 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 4648c572ab0..1cd69ac96f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df755f729d7780207c6a8fa4dc19513a346000049de59993ca8caa32e2386146 -size 565628 +oid sha256:93d53aaa4e4f4f1ce285a010a00d0f1b6e5977f057739aefc4f05a1addc9a4f9 +size 579540 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0342bdb4c79..d620bbcc924 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ac17f9ff450c981fb5fc8eaaa3cb511938723538a10b29cb1a265b15da367eff -size 492458 +oid sha256:02e9a0f04f83103b1ea0b0433b8c1299b2ce7e097aa2c8ef84de669e5a478ae5 +size 498378 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 99e0c37de80..63beac3fdff 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94c7652641a897d16f700f1f1040ad1b12508dbca9cc9fc2c2fda9baf7208579 -size 654583 +oid sha256:fbb2f66c249c791ab47bdecfcf9b95649873ebb813aa051f532605092d969e1b +size 667755 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 02df3bc9ef1..e365c55ca35 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d22efa01a4f0122cc6122851be4eda0c6d693cf58576a191dfe0f13d62169dec -size 602534 +oid sha256:908dd368eec41d73cef233b7bfdcf011211c5c082eff6f4e1bf06e47449dc1f8 +size 601102 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 3a80175e01d..82f74fa994f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ee1c84a0c68d99cbb08a812497dc3c79d50be0692ad7c020c42035776b02baa -size 643131 +oid sha256:3f004030f23a291321d886350218421859bacdea71b40b5525b43bb4a5d4665c +size 656205 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 92244812295..e156a432f23 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a315bd92482b9ce9bfc3902df356677db4ab84bb0a896026fd4b303e9c791aa -size 596410 +oid sha256:aec90a02665a9de8c01109f0534e75c7725854ac663eb4cdc2c54946995bf1f6 +size 606128 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 442da31f976..be3ea0d4fe3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1a21430c1116bfd57671656be2a810ad42a4908e946d69d3a2311bbc296f5f99 -size 644401 +oid sha256:5a5ffb5710d59134484d221d0862fac38204aa033a90c5cdc267abe7ebd44f7f +size 658905 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index cc0226c033e..b3ac1301636 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:abf2e89ef2ac9b51e8422e33fc4d78e95e1527224f38533cc3c03aa23e66e038 -size 554062 +oid sha256:acea6f68a163fd61968202440cd32c122df2cc5b1a50bffb7514f790995522e7 +size 560920 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 2f5e0524053..8e83bec7233 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c7c8ce6344df5a4bd27bc53452959dea9aa20a8fece6b57a971c0e9a72a5995d -size 597136 +oid sha256:98b7bf787a6d9d279d4e4e4361857dd36e4ab95eeadf424d9dac50dfb82108cf +size 612330 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7504b97c51a..c47fe22ff39 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:667795e2a13c4c85411f554ba69869779439b209f9f9467b587a33b1bc57ca68 -size 511880 +oid sha256:534774568517f0c1567c90ef98724118933aca87bd1fe0294702c479f989a3aa +size 517948 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7c6f254cdc2..64ca4c0b4a7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d4ee6f61aaabaa1b6225931c3259371109bbbe5c8499c9881675343942b8b63 -size 647429 +oid sha256:56faf188a6ea4c749b9e184d07e30935451fb5c84ec5647511a4c20974b251d3 +size 660601 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c4f277fb870..c501a14f903 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d73675e264ac7adc751efbf2a5c81e561bf66da4281e33b52fdec7a113fcf27e -size 595380 +oid sha256:8aa7579c8afc69edd75ffac0895e6180870da641f3d6bc9afcde89abdf652c2c +size 593950 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c31b199326c..ccb3f0562a2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:33d87a9a2e4024ac8232fa3ce82d1ea71ac4d324068607b2a5970ed3641ce302 -size 635979 +oid sha256:893c07da00f363641ada1108d353b6beafce9d4caa20788cb6b1e07bf4a43f6d +size 649051 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8379cd1b872..461b7e5d9cd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65c7587295734d4ed2110442a3287a9dd76b721136926b31a4f31b752d3d0fe9 -size 589256 +oid sha256:018ab4ad934677f9ab82b4fd12400e41f94f157f521cc3073be3ebcacbd4e791 +size 598976 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index fbdde704f64..e2515641e44 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:87e18f18fbfc26a6caa5ab31c164bc34f50dcfb122801a6087de386b95bda1a6 -size 638037 +oid sha256:56525a077ad234abdfdb61c2fedd5e681c9818eeef1a879fb2be3d79f5c7a242 +size 651751 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a7e7e53109c..90cc48f4b8d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba2f305055f0b774e8256e3f2f20a9a291817ef92b757e0a5758319fed61979a -size 546908 +oid sha256:8a0534ddea17f33d39695299dc0d31795b0601ca07d7fe1bdac4e951f4b0189b +size 553766 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 9b6f23e1249..e2416c3a898 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:05229f4870791127394e0f865361dd62710fd722cdf9f1b6eefa9eaa94270d5c -size 589982 +oid sha256:175900a93ca8efc274fa98e0b2f045a742b9874d262f828be1f47646cbbff8e5 +size 605178 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f1e9982020a..dadad9f720e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7b004a20ee0a6b8f6f858e2db5e07d018aed72ec65022c51294c1e62020f88ca -size 504726 +oid sha256:e67a72543c62de3b56529a6f68d223b5f8e6862d000d0705569110a88fd540d2 +size 510794 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ec80452cf9e..177f0d8e780 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd734fdbe8b14f69d4258301c5e89eb624accc791e9d5cd72e8b44d543dfea3d -size 733581 +oid sha256:5a8a3498b8b632c41289e8453bb7d50a3d60d4c97d1a72c828c70550703d5ffa +size 746901 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index fef92fd9afa..51bbad8a105 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:536c9ef895e39269c86e6e9ec135e74f5dcd70a1fc036c5934a35032f09c4774 -size 685479 +oid sha256:3fe9608498be4688b41cad5fad35cfe84de25292e749cd3532ddc861dc31fa56 +size 696037 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 20154911048..2523b925974 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9a0520cd489f64267e193e6b4f58823713d04e600d2e75f26f378d4422c76441 -size 717985 +oid sha256:387647d4517d3808db07ea7e4ee13568aa4fea7af7bfd0e838da06f256b23427 +size 730319 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index bb5b7df05d4..1f8098190b9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fc58ff7767a5113711debd1507a9496c728bba89dd811ee9f2b5084f8d55533b -size 676395 +oid sha256:289b10ef33d9dd00713be33fc788159d256a88c9f9ec8e07416bda886517bd62 +size 686755 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 4cfc94527bf..b980a2f461c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6cf54b82837e0d563c25bd99c73f6af20142a0930a1989f71e89a8c1e0d8846e -size 724829 +oid sha256:58cefbe300c9b312194867d540193e84dfb987b725496f1af463172caba28081 +size 739875 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index dc55df02e1d..f916b62e21b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f5cb09c40448791be75107b1e81a940d458966986c9a426fa4afac9b15e3c23 -size 637747 +oid sha256:2d20c89c16da67d638c294edd7ceee3531f13bc8a64d2e2f92d6fdc836fee455 +size 643915 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index d179cabefd9..4f0c7ba8641 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:acd4fb676c83379c1ff20d7c823a209585064465ee8fd4a6b6172cf3951b52ea -size 666663 +oid sha256:97fd32ebffde99ee39aeb7c6664c715e655f83c7254f71b955975a5a83211bb6 +size 680477 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e44c19ef227..297c2bc622a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1f3987c14e1f84271dccca88dc975bcdce155db18d7c021feb8e27943cc4091 -size 592358 +oid sha256:dd54ba7e2d621431b6920573ba68387d50e4c3162c391bc4053cad759f513880 +size 599264 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index cd772de368d..86e5379a025 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ef2939e0ed6d000bc00db55e646b39fdb6fdecda852c62a219cdfecad9e6c58 -size 720063 +oid sha256:6fc3e68f0b3914e8213d04242409173e14769910673b85f8f89b77a1a7d0d290 +size 733383 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 129fdb2c61f..207a008e877 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2862300c2debddf774cc197dac42a0ae96b0f8c1bee30f95dbbed50815b74979 -size 671961 +oid sha256:32173a19f17c1ad708cabde596e02dddd1e32dec4f730d3b876f4a0ab05c7921 +size 682569 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2ee8bbcd2c4..db0b40eee6f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f96adcb83cfd5bb06b660ce6e025239bba621a3ce52cc51f0ba77828cec0d453 -size 704469 +oid sha256:07f28a0093c2c74c543ef31993e2bba59b258d3ece5ea9db9605679d8793bd86 +size 716801 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a3edd664fe8..8fa4087e491 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6390850e222f1698c222417df4a1eb10472bd867448a06efa6d0d240311223b6 -size 662879 +oid sha256:de2432c84b8e18ab679fa888e197548fa03cee1b144fc1c5341d113c68969ffd +size 672499 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 71ce5fbaa3a..ad88a796ff8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca03b8dabe7abf1d643f1bad15f5115ec1294da1dbad79ae363fe1614ea7db7f -size 711311 +oid sha256:51c6df98f6f023c16ab20d0d58e4e295a31f752154afe32cda6f3f25d44ddd58 +size 726407 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5b992dd292f..03f7d831304 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:78204717ba892ff2d5eb1c8ae7fee08fdc4b95a46bb9423aa995d41919a4adfc -size 623441 +oid sha256:342a51fce2c42f115011c4c8f53b4779f5fcea99d630457763e34ace23accfdc +size 630447 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 848df37e23e..aa039b85063 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:465ce5514521b8a0b2bba2697c7512cd4568c6259bb40115b2910df0693dffe9 -size 653097 +oid sha256:22f41cde63d2b6ac14f4f727187ea5f11a6160a4f22812ed6f3865ef3d5658eb +size 667057 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 95192001add..8961c790cd1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a73a22275a4bdfe9fc49a20ac011ff2d4d84422841846874d34c9bc752c909dc -size 578840 +oid sha256:53da6dcdc1b4af50a7654521bbd37aabe7515ea45e6a610cdfc4ba04065a8e75 +size 585748 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 42bbf6125ec..d75ca1838b9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76700e5d2fe9180774205c646905f966f10b8822e06c170b3b87d3b109b96e3c -size 754039 +oid sha256:face4844f4efb67d79e016f1705ce853e155d59c25f347241cc42462fed15bf6 +size 767261 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d45b45c83a0..d2265aad0bc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bdf8f9ff0da9245c2d59bc6a3aa935f71e38d75393f9811c80b2783f7c9c1d1d -size 705099 +oid sha256:1b85ce7a0ff3863490454d544765b06c1885a50b645c848de91788e372323432 +size 715015 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 88f0361140c..d7cbfefed9f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:52df44daca8edf29e5b5505f1c4c01f51c196ddc01d013a6f9667c90ce9b8574 -size 737655 +oid sha256:aa326d2fc36e565343e9e9db57ac64c6b05967e40cf098e0ac6d154071231767 +size 750679 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d7974eef4ca..f820e28d137 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7087d5b4bf8937ef8684fa48c6757483f589b1487b3cb197011888e84e3642f7 -size 695225 +oid sha256:0cb825581958ab716769d3bad5c8b25c83ba28e4d184a8b8a0db849a80e7a83e +size 704895 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 4b0e79ea852..455607b67ab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf3be02fe403a1f843b4f3fd9571f1c4d1410b0a8e651acdc08f78b6e8ebbf84 -size 742771 +oid sha256:76e2404cc95fc736181eeb3683e205a1352804194ead8a7cc27ee3494d893b91 +size 756387 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index cdccba4de52..ac6d30f013c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0786d9fe25b6c3e6e1e540e48b6773399834637a40bb6fe5d89e580993eaba6a -size 646809 +oid sha256:f6aec7233bc2abd06860591c07bc580797bea73de6d49e9b544079dd0e80a625 +size 652977 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 7d063d8d34d..6c29b596707 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebef528fa4dcc66f63d98095c6106571c0992700840594dee18f4404ebba8c65 -size 690031 +oid sha256:148b70904aabc3b6f4952e73316408b95c61581b7b33d000e04603edf1c81369 +size 705127 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index add2a8016a3..38ffd5950cf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0c64eb28f24da4579c86826f84dfca3dd4a7fcfbd0f35843f9da4d9e65b1a958 -size 600630 +oid sha256:fd79367fd066a27d4651f5749091fa790488374e9e5a28e7ebb7f06cf8de87ad +size 606698 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 10431fa9f33..da6d2b20518 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d48cc229983ba2db415be393bb82fc577f480bd0db46df6dfd02d62c273b51a4 -size 740521 +oid sha256:8ccd6555a7512369b8b2a015990a3e80f9d8feeec5a7fa75b5b1ee9f6277eeb5 +size 753743 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a73ca7453f2..3214c390069 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27dd2b6389532dc509e1ca2eed08946861e394fddb1767717d6596a406efd42a -size 691581 +oid sha256:67f2b9ef930bb75e168410bcc3148559ce57df2c48c3028050b9ebbf4890b2a1 +size 701497 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9bb219b3151..ab87e079588 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0d1acc7c40dc2fbbb35db58321482a2afd1a6c0c7ab08abd422598001864450c -size 724137 +oid sha256:da6d213a6a99406a1330f3a6e9ab9f3b7c48dc65563e7db5aa416182aaa8addb +size 736371 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8bf3837a51f..615970181e1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5dd03b5049dc3aae22a567b66bc0a0d30a0b3e7177c89657cebf37e948fe181f -size 681709 +oid sha256:74bbdb8edfec11a97d7f44917cd41d0ad018205e414dc2cb2d0715c7bee48009 +size 691427 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index da8ac06b88b..3fd48d0d44e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9f6c004e87ca9ea15ddbe1409ddbaeead0bf3653efe6d1c75d839df407c6c7e -size 729253 +oid sha256:f9b3034f4da94171528f3bebeb45979fb1d119d16d886880c7332c79cd0300bd +size 742869 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6fbd0a0a7b2..5d8be2e28e7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7b8e31481cf5a3870d6ea5c930f989480ed45ab3bc20fc2232971b1bd7bb04fc -size 633293 +oid sha256:7cc80c6101eaefab39b61961ef19bb322e1e6a279ce9d05d601f568beed1feca +size 639509 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 5446fb8e4d4..7f52ea7b502 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:47c819e455f99e11ad95c0efffb991808f1ecfd23ca9198230a70767ac659125 -size 676515 +oid sha256:3ac2859a82fb9c926a553dfd2d529204b3952d7c2687480f74fa6c5a24f0bc4c +size 691611 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f61b0e922e0..194f860e785 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee3c8f7abd06c4e937012cf8e93a13a8b91446a405b195fe341301672604f76b -size 587952 +oid sha256:998da24ee58522401e8230aaf1c6006e20d5a245216407c4fdca6c5f47c10da2 +size 594020 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 5eed2311f15..9629b0d4dc9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e22baf8f6196193247cc8a66c21ae010f702dc6e2f9cdba3b56c2a0659c8674a -size 694265 +oid sha256:82d4cf87b9c9e3a0379f1e1e9e78c15a966ac8dde95b8438a3011fbbf7016537 +size 704279 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index c7570ce0109..c0d31f45d18 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:017024dd01a8d514280a79b3ef40e3f708c56b23cb2ca30de54065a0f3b44982 -size 687845 +oid sha256:ea388eb5492f8d50f2e8f19675d5b3e9d3a1d07c9a0f348ae94107f4bc148ffe +size 691693 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 2e11f2a88d6..b6008c65bba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:998e9704b5e3e1bcd890cdf073fc9a5fdd0a60d73b9b3be011fc7ab9edb6acda -size 663461 +oid sha256:402dccf9fe50ee38ed0c5553241ae23347b60e0bdfea02d18092997821d67055 +size 678113 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 0516cd89052..fe6a6ba4785 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:336ed15d3058c6bb5c7cd9e0d4ed9b8f61a7b5c6e1eb529f8ac097e33293bd84 -size 585604 +oid sha256:1227e970ba3d448aa6fcbb1024578246c3e30a4eca5761565a1c6ff959307269 +size 593398 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index ba5a0015503..d796a5a6817 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2dce2ece5628b437938dcf50c6bcd63e45db141e28fe4146c46111031507323 -size 687111 +oid sha256:cc7da912d509e15dd8854b6021cbff9e2dd9a4a04b4f55f837f02adc5e128a6a +size 697125 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 503b66375a2..8e0652fbba8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:890d3398876e622a186adf239e5ed2d657d61cdf9cbf7442019381e234ea9da3 -size 679903 +oid sha256:6648baade9be2c22abaa6701c4f1ae8ff284ae3ca836b4d091fbee2d6894a950 +size 684539 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 00a337df067..0dd97b82a8a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c578bcfed14b88fff371df6d6f09c526ea3d4a7ceddc509db580d1bcd2eee8cf -size 656307 +oid sha256:46b6203d016d94a0b7850c69701eabc4a05b70e527d4f520f14d1c741d835b4a +size 670959 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index dc46263dbf4..bbcd193b27f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:48a9c08f9f8eaa76758f464c45a79073ff92eb998f67c9bd39f189cea6379261 -size 578450 +oid sha256:827824d270d5f9595b61ded6d44d79ab1cb3dcd40576e4cb0bde2cedd1805f09 +size 586244 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 235bac27ffc..089acc2d6b2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1e721fa4ea6d116b7bae86b8f3a6a35f516050a3d0fe0d0bf290d6ca06f4947 -size 612908 +oid sha256:e3779f179ef30fffba1298948322a41d115baac6998e1150b1a2f8a965833be1 +size 625293 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 455eb516e56..2222f92aad9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f476bb75ab610278edf256fb9bb6b735ad263303adf09205868f19813c2fdf2 -size 581974 +oid sha256:f6da0ab2141b21c8597ea6d594be271e2751491c16ae38739a4f81d2f843e042 +size 592778 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0409beec6ad..49073413227 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:237a6a7a438e5b1fc92da69df1501930483291a6dc04c3955fec3f52b61e755e -size 606440 +oid sha256:37f475d6378b24251c75255df2b6fd99653184a03a2ff5e4238a239a5615bec4 +size 618923 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 37ff8dce0de..7b035371259 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5190c2a3b1e80cd59d6393ad505180b6790dd0a5a1f43f31e5cc365c8d6b5552 -size 577430 +oid sha256:31c8e919706807866c7d4559f4995ec5afb02a6e1e8e99d044d7a5795cc06017 +size 587248 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 9a45cb6a8db..4954512ea17 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:755e1fee3f7fc51039f54b71d4677bfd01df42db09f7cbb52ccb12be27a0f824 -size 609830 +oid sha256:af3943ce98ffc0ae82dcb6d289c8c6d232f1947c114c7d12c2c5ad3ccb486a6e +size 624039 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1ed058b7259..07d8d8c57c7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b42c22d7eb87898115e227f982c02335a66eaa2dac9b58d02d8758076e968948 -size 533848 +oid sha256:012e45548b2cb04c21a05c99e6650b2dbe73091893e3a32802f40c1dbbd2681e +size 540608 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 92b1e531060..98689fcf42b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e05c52f1eb517fc32528f000cd7608521d798a72d529f0ce060e17e6d30ad31 -size 575246 +oid sha256:a25b52edfc513f63ffd1e65495bee61bd1985b4e5274ebefdfa25a5044949232 +size 588664 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 41fe1b172b2..404724ab6ac 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad4b854c867894d6ce59224d6b6955b068c35ffc25fd1c2a737c890e6cb4ce02 -size 499658 +oid sha256:36fc76bf48ec959104ed4bbb039839e046a443f7cc6594d8d4c96910ebf8adae +size 505530 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1cf0d323dd2..263cf6803f7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c180ca61585eaaa9240feebf68234517a9353555125dab6da3c3d8a179d0cb6e -size 605756 +oid sha256:73f9683303261370594c12068ab1bed462efd7a733c70b92a7ac8194aaef443b +size 618139 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ef883d222a0..6b78e242033 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8352c83d8c7a680dcc2bc90e424dbd917d70b5f4b90c541c0024cb70e790e0bc -size 574822 +oid sha256:d30a8b767a2a281b0b04261f3e04d07da6a181bab66b8f86f6d9adcaba9663d8 +size 585626 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a124f5e22ba..d3c3b07ddaf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25e7f3b052685221ca1906af4b435d805a1b8ae1d84dde270188b54a4f152da4 -size 599286 +oid sha256:afc8337d74208f95d5656c5ed45935687adeadab9f4b6400d5ef76ba59db18f2 +size 611768 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3175ac85b3c..71cba1f5263 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f3f2edb5cc46cfad172f586a8e585545ef8ab29c56e08519d4c07e3756adac45 -size 569488 +oid sha256:963348a5cd17451ba0569f98b2b81d6767c6059811723199a0ca175c49f40a19 +size 580094 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index d2a291099d2..16115f30d77 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:978440465f4d80594574d5b6e48ce5114acf65393dcf65033c25be1fcf83aa1a -size 602578 +oid sha256:12f84ecf0c8dd95099f4edf1198e93d271d9f3008c8f8a7e09d37c06157d9c37 +size 616194 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4302f62284a..06175cb2cda 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a822feb64afa664d8ac8abf3e82170ec107b5e7fd08247226daca4109a0a3c77 -size 526696 +oid sha256:910da163b2201b880d5380a99ff4fc5cbf737e49c9534177875e88e090919c04 +size 532664 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 03b66bbab75..fad2514ef48 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2bb5d00dd47119e01d26eaed05f4baf10d2eb451224cf6d33ca544e96f2be6e9 -size 567994 +oid sha256:1f7788799bb931a4033e33f02a322af4d5285f97b6f97317c2b093c6080da315 +size 581610 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e2385fbe0b4..1922daf096e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d3f3e557a3ccb9bab31b867f1d0cb84b278b3466d5e8786b632b95170ea1a7b4 -size 492506 +oid sha256:69a0ed6b4ae10382c0f6c5aa7303bca5f7d3848ffe9b4b46f9a71d475ee359f6 +size 498376 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1cbaf3c67d5..ca76e7db724 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cb140105a755ecd8d83ea3df89c3f62288b51b1a5eca9eea4eac0b6cb6c3c9c1 -size 634157 +oid sha256:1bee69565b8198b39f4b9f90228839c183c6ae22516e31a5817c1d07c1414181 +size 646293 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 85ba9c4a82f..9ed9a42935c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:414bb2c684f99a864f8207497001a25b855d8c7b6ff2edd663a8c2e2c76aaad2 -size 601742 +oid sha256:fb1f6694dad4d55ccd3856055f74f9464f4da85cba22ce97edf5a8fece4cfb4a +size 601890 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7aa8ea602df..4068ae607de 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3069770d4b08bda622b8171058dfa22be1b6a03be939babf0337c5b5daf4420 -size 626209 +oid sha256:6bcb1de20ea98d596ca26ace3a54fb15c5e33d76cf6603001d23082513c19314 +size 638345 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index eeb9fe1d514..470dd0b4407 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa75f88ef23d5f584a54b1d6c9b423c2775a3d4f2320791915a4a4cb6b35185d -size 595618 +oid sha256:18eb7c8cc73323aaa2824807eb328a8b4e1d4af0b3e7cf86701c8f61abdd8832 +size 605238 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index ee1265d74d7..6058e35b77a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3dd53901a9e8b7d63af0a2a55cd0103c5186eeec227c3f4c89c5da470f34c6a -size 635667 +oid sha256:bd3e47e1199a90aea0f77d65b9798a83b4ec1f601a9ceb46096c212486a31ed4 +size 650071 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 541cd981949..530fc1cbe10 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ec31edc7b877e06faaaee7cf56da95d0726e14b9768e7122679a6952ca23709 -size 546166 +oid sha256:306db80a84162f868832cbe0bbab5b95d055931fca3b5a6abffecee4c46c87ca +size 553122 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 764531fc3f1..87bcea5206e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bdd8b5bc6eb21a10b30450a7d7928a925fe9d44e0b60beaf788fb48fc91dd8fb -size 597232 +oid sha256:706b1f784a101f7a3a49687b4cea972d19a16180e73338d66a1b9a847d84cbba +size 611540 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c8982741540..566c1ec2d0a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a0b2c848fded77c9370c3f551679a814c8b17361c352fcee7b5111d1d674dc3 -size 511188 +oid sha256:a951ad5b5aa3f73823c8683181951c3e2c2f682835316dc19059512a8ca9d2f5 +size 517256 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8deb4822669..cdb1d581791 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d5bc59377ea2a34f038c266720af43de35be9f4a2840e8ef3f423ba71c6373bc -size 627003 +oid sha256:661d81a89d351812814dcea3b79385edc032bdcdfc81a3a8e480e02a9a60de8f +size 639139 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e367386d9ae..343bcf9408b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9105466c354c982cf4fcccecbf96acf7810dd1ebbd11373f2ef27306cba4f7d2 -size 594588 +oid sha256:007af2cabb0dcd627c41189140ecffa69ac82b88546e4e8491998609e7713de4 +size 594736 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 495a03328e8..c45be5b0437 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:26d289b0cf450736000734027ad7a6e7901fef50cff7115825d862e4b10e4e10 -size 619055 +oid sha256:798c0ceadca26614b56c5f0f681bb1bfa5e44f1abbf760f35d80b9056543d95e +size 631191 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f8352ca4ef7..c7d2a52dde2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:584d8bd2bd84bd7ec1633ba8ff93a69ac1e0d85c8d679849c9a72a2961aefe27 -size 588466 +oid sha256:6ec7998a85380a04f966d6acc6c455e5013c7b65b7bc6bdc0d9e4c66314e9cff +size 598086 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 0ad38131571..68abba21da8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ce320921b1d41aa10460ef7ab9b0fa6eb1a764255dfa64a3d22e041497a2b9c0 -size 628513 +oid sha256:7465e455d920807cba786b15b0a0d51462c32bfb7cccdd7affb3ca9f1c46b06f +size 642919 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 872e80c4d85..d3d4bf5e1bf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6645e815404438df97165c0094a717e1cc6936be39c0702d32024f2fc7659573 -size 539014 +oid sha256:1620992584e5db3f255383f708f001884880d9ffe4576ba36578be0b0afc01fd +size 546018 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 6f9926ae1f0..9a37de9e140 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3691c26dc89a958cbbac990b0a38bd9b4105ab46b3d3ffb879ea73e29d87f25e -size 590080 +oid sha256:9db588bed7ccc7c1dcaabbe917824aa9944fa077884454a0371358a5ed328a51 +size 603596 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ad8f07b108b..28959c8c534 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2b9d06960cc6af6f14f1936b23d95bc767802c9b75fe59b682a6bb0f67d768a5 -size 504034 +oid sha256:b187a2dae2b5dcc7535353dc1a3358fdae748d4d9c6e6ac875f98f48f2d6bb19 +size 510102 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 6a3f1f47fab..a2a898e5caf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:153488c4d7f94ace27eec671fcaafb1314df65773471f61d0a689441af2ee41d -size 783363 +oid sha256:39ca7e21f70912a8e2789110d09ba61e207f75d3115a9d7f8ad3cb753f9f9cfc +size 831215 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 0d144c846f2..31001c965d4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbf6a5f2a20b406a5d61b504c445e0bcf0e3f3b7a46db5cc58acbc3c60823739 -size 782321 +oid sha256:b7213fb15b3dd14b0a6b2f7f629287b83e6cadae2da980a400558348f0f241c5 +size 813745 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 84381d98763..279b9306ecd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8426e402c2dc2ae58986679905b184b079a66db37874dde20144e223c143a67 -size 749549 +oid sha256:d118d9dcb337f3b2920b2654d9343b4ff6288483ef774dede5295bb82a04f0b6 +size 793949 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 8c1b3ef6080..27b8bc59afa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07f3c1a93761c761caa9a786defca0fabfb4af70e4145908aab6c3b4a699a17d -size 680523 +oid sha256:21570bbe9a2f6e2f32d054a4b85602e2073b026a1a7bfc5a22e9d0c6d7512dc6 +size 708199 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 137efb9068a..91a1dc11216 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70dafb7de7d14d4405c3e4417d62ccb6b223ef953a3dabdabdad549fefe202dd -size 769105 +oid sha256:2b69104ff4560a7d0cb1aa823b166b387cbda32fb81d2c9df7e0bffac407f53d +size 816959 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 660825d9778..ff57810e1a1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d4bd925a854ca26e630e4df3b1b9ea1f8933dc1f586bdc87f80b1996ddeefbdc -size 768013 +oid sha256:5ba790304b7740f1eadb500f15f4b4a9ba70310172db9c964f626a5abcc50452 +size 799439 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 9ae077d7607..2057cff970c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:71f95d63ea8fe26ace3b1835c69f5391a28431dac451af043999090f6a30046d -size 735243 +oid sha256:26c7619df8e0fae1fa59ea38611bfeb79485e088006f7354772a6d9569d3fadb +size 775005 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 4886bf5f4d6..5c088b56254 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca5c11bc44917923a2992af25335cc185de23d8653478ad95a8afff9c6d9f1d0 -size 666217 +oid sha256:175c3447e8d795af48bddcf673f75f43bdf82da2f2ef186ea7f3a46ff4360003 +size 693893 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 27a3ed68f1b..870bc7b71a1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5afdbf309de89b5f5627fa3742d3ec8285a90d02066fff4a4c98e021a2d78e45 -size 652527 +oid sha256:273c1db85e6b54f36934aae48d83505ebd25f06c1b1b6f71c73e1f7ef69cfe0d +size 685135 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index a4b161060ea..aa4c8938c90 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0b8c842537eb09a187fcee49a75de89a6fdf8128ad9840381f27923f28ccfaa -size 607680 +oid sha256:6430a9350541ed2a4a5e9b57c0a4860e6659aa13a3c5d016d9caacfe16c3ff75 +size 628105 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index db7e0f0fe62..1d03c4129e5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8b780b7d919bfa6360bce379da52a44b0cb0ba34ef048fbabb8a419699fa7829 -size 641667 +oid sha256:ddb8952d77e694953298ad9d9849b95e1a935d5fe27ee94bb413a2c9a06a4f6e +size 673537 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4642e9a1e0e..dbf38880f82 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b00723d8a0fd3cc8a6329694baf749339ae28946cbb05b666e318854baf8730 -size 602938 +oid sha256:f4607354446fc670c019ebd68fa56260d414d493d35f23023884de48bf679a27 +size 622771 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 459664bd6d8..59f0f71f7df 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a30226eb6d28a08f52124b97a829c98c4dd9dfc1dc07c88c85281465709d0e5b -size 641259 +oid sha256:c07aab3b54c11a4fe71bb209b1575951333c265852b654833757ed5d576fd8b1 +size 679443 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6d82b18a35f..82864eced1c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c6ba07f8efe8288919a8ff0a1df2a76874e1227ec244a67ed9d873386663855e -size 555508 +oid sha256:d5c653ea1bc65df50597c28d97a2f62c75ddf20a14d21cc6ddf26f51273da383 +size 584862 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index d991f33fd7a..159a2d6aaeb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2feaa142d841339a8cea794c80f02521e355f550aee23431d6d41f1262eb660 -size 596116 +oid sha256:402a34874071784705f9fbdae3f2d6fff64f17ed4e4429a126e0ed9db4cac26a +size 622017 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 879ddab5cfc..53fc873b6b6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d65edf4b534a3364e468c04fe5527d5aede810e2ca12093c6ed835dd0bc7b8ec -size 521762 +oid sha256:3a82a693cd81a0c72c127902e1d064ac3311c0a202c7e4bcb8d7c376dab614fe +size 540016 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index bf3b490a820..f2c9c60cf1b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0d85012b0c98ae497041f6e961ef6683eda5e5c0015426453afd3f9cb420292b -size 638121 +oid sha256:27650cf9c91da950b7268be7661c8b50eb06ac0110c97f1ddcfa124d2ed4bad7 +size 670829 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5ef1f8a5b81..9d7ff6a0758 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2543b5cd1861721bb2cb998e6f00d86a8e65272468c3649ac59ddc925c7b073 -size 593274 +oid sha256:17c896b82cc9d58577441740e61b58d0cd77004eb12111b6fc97be8e236321ed +size 613796 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 05bc99a2554..101819c6ef6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:97ee7a4dce8d0219c09b0105df7fa5aeab1b741b0c7d266f64e897a26e79d7fa -size 626473 +oid sha256:c7930a14fcd108f6b7ce8708f3a192c42d2c4158b5d6fd1e3ad5f45e3a72b041 +size 659229 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index f7f6574d908..d4a891def17 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1df5109c66dc79490b6edb1fae6e4418a1190e302aef764753a74a49f738a37 -size 587742 +oid sha256:421b984c960e7249a2866f8e530805b604bc9b0370eaf470a87256f38c78a6db +size 609252 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 8cc26e5e681..6343b107670 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1061fcab2c4660fa98a6a50e0d42822430df4997a7ec3ff249bee04c1486660 -size 626655 +oid sha256:de23726404029ddd116bd7bd6bcec8d09ae0ef373390516e54d6ca07f60f3751 +size 663557 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 84aee7cf911..b2c1f0e20c7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d79c3646fd663937dcd8c15537c5637460b50ff49feee443c71d99cfb03a58c4 -size 541250 +oid sha256:492c1a949cfe2553e235686129b01dbc7ab73e344d07170b61c4d69cd120bc50 +size 570604 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 7ef713a5fe0..a70912b3959 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0e9d0577c8a5813adbc7000815186f40124ce64a4c949a4ef7f6ef0ff68ee8f -size 581858 +oid sha256:a845de9dc1327cd41293a357433d1971883c4624ca95d87e4e3b3e126f282ab2 +size 607610 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2a07440f887..1ffa9664b17 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:12753a852cfa2a2f18146f3cecf97cbd24d7684f339b9357b86a4dfcefb9e675 -size 507504 +oid sha256:2cb0d70a1824bce5058fe8f4779d0459d4163ada5228458983c3109bf95b31ce +size 526548 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index afa54555614..f716277111f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9886366e3553ecb0057ab9f74a3dc9b55e0694d76766f1a67b0ed5d13eaeafb1 -size 671455 +oid sha256:dcda9f2e31e45b19ff1c45ec1ca3349b013ac0ad9da106d491c699042e480002 +size 705693 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2ddf8b09420..84953501ca2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75003b453f2c275aeb1a24c46f437516c8c2133204f375e229e6c328ccc91441 -size 617678 +oid sha256:851af201d34e318e54fa41c10739794b6ac464207bb0bc646f19bc88eeddc46c +size 635539 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c29422333ea..4a8668ce709 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:346cc3d64a0355695f54a1359eba89e438ec3d2173c5d6528a829ab1c7fff9b5 -size 661335 +oid sha256:a7090f5d816c59c9574d71a4b617e5f994d63bde9a9467a490cd2f9ceb12992e +size 694143 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 86911fb92d1..f7c69747267 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a0e78e040cb4ff41d00082fb9c91b0a1a57b98738514637125223b3fe43fc9e -size 621817 +oid sha256:75d44a85da9637ead4a4df1a5ec9e5643b8ccd0026f5b6efeed437512489065e +size 642537 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 5ad0c84794d..9866eea9f75 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1681bf41c9232817362f86d38b455388ff46c965390a7c66469fc09767397ee -size 658707 +oid sha256:22fb39284c12c9e18c6be7db4d3b7cf9ffa144afbeed9f4b0bf2ecd69fdf8688 +size 697137 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 86ec52f2859..c6d7394889a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6fa79b0bbfc751e29ba8b316a7366b1a3dc61fab828f7442008dc4db48dd98b6 -size 560820 +oid sha256:2bc7684badf81ccd39aadeed48322ffd69fe265252858f528c6e2e401d6451df +size 590026 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 7d199d16023..eb07238ac66 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8b3a9ef828fcad6309857c5bbbf10b25ff57484e4c98c8874b3b09e923ddca5 -size 619189 +oid sha256:14b3a40f28811cfc751fa2b39d49251a1deb0db9cb22d4184d8a8920bd6fca7e +size 648543 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 859edcad8ef..32a405cc2c3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b9385cadc73e35126713cb8b12d4393642ffb9c4ee21ca5f85cbfb181d5aeee8 -size 525348 +oid sha256:6f60a248d1a7b3e1e808c2d6031e3c73aeb8de5ae94187722faccce12d997177 +size 544638 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 672d74a7f2e..8f41e4f281a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f034ea409983a61c066ab588d4462670400b61d2f8eb377cb7a805ba49eac8c5 -size 657937 +oid sha256:f137574d33638b94f0c192e0c30cd8772b41f45909ac56b3c9cc849dec2b8934 +size 691435 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c5dd5c151a8..f7c051a716b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d4aa82e9096ad03bbdcd4bd9b11e43d33744e0142d5cf54624051da1c288ac7 -size 604162 +oid sha256:9bd1074e8ff162af8b48b360bd1f128b470538f9cc58c549fac63618b9667522 +size 622021 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c9c7307a45a..70ca0bfc3e8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bddd0d0ea9c7359de4ec65755bc9ccc0d465f4f1ff69d92278e6e152836fea7b -size 647079 +oid sha256:944ea5b903b3d68b37a2179f0e3260645416ac15624ce225a92257b5b9c5fa71 +size 679835 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index aa1139dc95a..c1737446566 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32742cfa09f808b274e0c4451a4fb527c14b1446fab9eee41700bd57bb276716 -size 607510 +oid sha256:b847596c9322769d6a6e0f9ce370602052fcb8fe5b8c59d48e8d1f945cadaac0 +size 628231 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 99f48b99d04..b4e2ff3200b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75a7663cd122afeda18fa68c9c8cf49498cd3d08866597b151216f28496182cc -size 643611 +oid sha256:145021a7b74dea7389e6b4538b9866f2fbceaa4143e670d9b902ed4cd776652d +size 681253 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c7f4d9fd271..aec687ecf07 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a120715cae5f42d27c1052181e27f97aa0467cb289ccf80d89c3e0a05d87a30 -size 546514 +oid sha256:f0e1ec666f010256f8b6c37a9091467979fbd03d9467b71dfc4194a4db8526ab +size 575720 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index d40d56c0a16..b4f653d0659 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2dd0f1a8b2294bffd439b4a27f7b6b072f3afc2f48a42d629849331236e09705 -size 604930 +oid sha256:4a7ab3f0452a5808f80ac60338fbf8d3a11d2bb116df5f0757a1593624e3d79b +size 633151 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b376dd88c5a..11e11dc929a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab283777d01fd89907f57e6bbee65eb283f5a9684aa2f47d2870ef385435fe4a -size 511090 +oid sha256:aadd8c26e00768270a533c18b17171cdd5af449447813b4d88887ce214d038a0 +size 530330 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6157b61486e..84ac9f73cde 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb834ecf4b0e9c2a63da49f5df331ffb38b5ee72198f518c565d0d75ed7c0ad5 -size 794607 +oid sha256:03adc2b1e654065c71c8c8ccbc817bc714148cc59df0828d132ea2aea7c8d484 +size 824059 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index b180ddb7fb5..b26f16fc74e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5153270d1245c5311875455d5f4380d924f3b9154097d68616b27bbf6387fa4e -size 753461 +oid sha256:99106dab4c18e07d595118731395dd660ff6141607765866b5f2eb6f4ade43bc +size 771813 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 91bf8bf1c04..6f1c61ab517 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a599153baf9fe85b4c9361fcc21e3f95b8eb65a3da7dce425dbac562db07a8c8 -size 778025 +oid sha256:ad7fd074b7a4b33195a03f4289a4f7f295766c2ec6a9867d93d400a71252e5af +size 807477 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c818a473b82..1466c22e244 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f4d061e226da740235466f11680a4545d4f96e1b402b18cb0d152c560c4343f0 -size 742601 +oid sha256:08afe8d5ec4bd269d9c2e7cb781aab70b85b971034610a0776c7495ee37d61c2 +size 760953 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 8565cac518a..83aa69ebafa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f80051cfe2791971ffd5466b53ffd02d7129ce8453ab386a5061e9b016c755cf -size 758375 +oid sha256:b9ef67d3154a358ae897ad3569f8e16e4d477f84ce3f950a845ca909bc92f516 +size 796659 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 4572a2cd708..4027f0d9f57 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:213f9243e057ed39b2e847cc206d622f6dd46191b0243cad4ba59d51f44aa2a5 -size 693297 +oid sha256:599b0dccb91a4118c6cdc56341e4b30ae53625fb959b5fc80623b4a5aac0aea7 +size 720875 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 679ff55d49b..78ccd8ca023 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c2e8478485e38d6963ace7586b8ec6ad2c2f47c6676d4904590aecc47f42c5d -size 707857 +oid sha256:72d13ac9940c3d666268cfc8385bb507dd9e46be9fb0a28fdbf7cd40084de4d9 +size 734249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 449dde0911f..8269dfe180a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3495b2388eeada880c4930742712b80d6977edf83fc2f707e23f0983eb1042bd -size 655949 +oid sha256:47e67c33dea4ba90994bc1889ea4c86f89cc9eb22beb38660f4e36d164ecd2f9 +size 673611 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index fb1c9df2667..507786fdf23 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b1520d31812f7d0d24adbff717815588be47dc5f427aee1e794ee588210c480 -size 765401 +oid sha256:e57a49fb3445b472a4971361aa15fe3b20ff2effa97122a5eeb66d716ef6b9a3 +size 797073 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 2181dcbbf57..d941602e9dd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d244df89b8ad7f75d5a5f4efa5c4fa82e476294c0766903599921ea8e11e103 -size 724305 +oid sha256:fd86844e34d406f834473ae53dad8a24178c7cb7ebdb740ebb4896eee019ebe0 +size 744827 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7eecf1edcd0..25cfdd1d5e1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8b9549a0481c4aad583370826c8f20ce71c576053f2f8eb41192b5c5660fd45f -size 749609 +oid sha256:f3261b719938da55bb4b1258822521bccb2d2306a78c42bf147142d8861b57fc +size 780491 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 25aeeee40bf..d7151c5c806 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1db77d4f27afc6af37c78dfefc8c2b7251e5bd30142d8c5c661f974022adf22 -size 714235 +oid sha256:e2f3075aacceb2fe92fca7b116f3cacfc26be992519496c675fe7b073b5257a2 +size 733919 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 38a7152ed89..8241ab4679a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f358b1aa2aa0ffc265fa19774df63b7a7d8f80663a2cd1f0b56a7c04c80a507 -size 730601 +oid sha256:9389537824e4d079817241833925fc22af9bec34a742567ebc1a5d4348c5dc10 +size 768883 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7d8b2aa4a4f..46ce7e6856a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e157b0cf38356227be0064498920fa364977413bd6825f37b76db2a819cd61fb -size 664733 +oid sha256:0d21d66688324b70e3d3086d6ac2d5a750d45baa794ffa483e4016ff7396f6c4 +size 693889 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 01138d2e5fa..05945ff7878 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15b38383f3f7a7de4e2c30dc31dd484abaac1cad10839203ba3f9415483b4071 -size 680773 +oid sha256:7f28e28822325e8f67b8f7d4dfcfe5c2ba1248d1cfd5d64e3d9d415ca40e6245 +size 706525 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 4adc96e4fd2..8f831ca16f1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:00c7890d165ca6b3860458359fa8b044b8c553c7b4d0cc746cf9c47e90cc9d5d -size 627385 +oid sha256:899cba92f8126c6ad43f34928bb1fbfc1cd1b74e7f6906b6ad2c8f8ef64ffc93 +size 646625 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a924ff4b3d5..455f06dc305 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b181683cd816b5c06ebbd41bb8d34cee5b4375807bd9f0e58e013da6e7a026c2 -size 812893 +oid sha256:4251779e93776cde25d305057b9c433320f7836254a60bfa6047327414467bbe +size 844517 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 51e97a73a0f..5b8f9bb48ab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1b526785767ca661535f4469b4539e2d925b364638cf1f3cd02e86485c0cd98c -size 771747 +oid sha256:e25042c698a05583b42a7dd23a0239568b34234a8d01d4876eb58ac380fac95b +size 790643 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index fe40ea17a60..059eed78638 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8c8198ef13222c2854896544f3c0eef9f85489ef5e09d73f66491f307637716b -size 797101 +oid sha256:416fa702f4925944fda71369351d35c2af5b622825572f8c32efd8683fb731b5 +size 828723 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9a7b17d889f..a1b7b7178ee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a341c4bbcce42219c93d825e2d999a0125f6258bcada29652d589e4f3cdeb02 -size 761629 +oid sha256:5b7d239d308dc0031a10860006bb2ffcb4992dc4b9d85faca1a2e61ca7ee8c45 +size 780573 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index cc4f1f7e515..c1e35beb275 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:42ef65dd9932946b280c7cbefb118a7cf12abf8592194963b4378d3af1318eda -size 774739 +oid sha256:a7e40c43e146bf98c7338343662f230b60c462ae60f7e7ac1191deac9e7664af +size 813959 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index bee0b63b140..541567e4967 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e04b6ddd7e63dc7cf62527b538cb93a153396f5aafa5dc39afcdd1a69db2a2dc -size 675669 +oid sha256:dbe6a303831da20d8ce926eb6ed0df131d8a8dab850fdfccbf532ef80608e353 +size 703839 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 0b745107c3e..a8aac8313da 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0fd6552c0b6efb324fee795da2be9f8200331394c42e57099b61e1fed5b2cac -size 731423 +oid sha256:ae5614c49e317bbd52fdb08ab50db02f7ba6afa5360ab2da7faafda10e0a93c6 +size 761171 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ea49c54cb1a..d9f061f4457 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:869a201069b4bf9833493ce386a8eacb4f9068f83dfb9aff99b6fe6c3d4ddab3 -size 637385 +oid sha256:09cfb9de5b89322421b4ce28caad0f9a7547c89490b4421f838e7a20b1f56566 +size 656033 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index aa0ad9c97f7..7b95faf1b53 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e71f3d17ccc86ef6b67e84aad35c14bf1bab369bc2b1a5f1a9d0eee37a79a66 -size 785069 +oid sha256:38a876e914a124cf4914a806de32cec7ec7522f32c82317b3ea4c3e97f530c32 +size 817531 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 6e4e6f39596..b61904227f5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c064132cf13e27ea3760200bdc4b836936c78b21b910a3918cbda29f5a69867 -size 743135 +oid sha256:937de4c976edf5a55ac338b58b430dd8374fe3c3a197442520eef35107a1faba +size 763657 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index fae0853e89b..b039c48264a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5358752ae1851f185828c35496c8bfefa63481df2110ad8c2afd3e581f03c825 -size 769277 +oid sha256:4bdda64421b8649d472094edc8c9b2014a51fc0c6cf29f7e1ccd815aa3614701 +size 800949 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index cba6bfd1706..a4b5af57ab9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e56556d3c35cc5614dd9f1e796bde8b4e31ddabb3e9a57e6e0efbb078c61739c -size 733853 +oid sha256:8d225289025a244cc6e01bf0679f0870ac93e05647acd4930835a98ead113947 +size 753587 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index e7a703c31eb..389828fcce7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b21ac5bb1b047f27a94b457fcc1530d59772d1507b9309f4cf620db0ff6adf0 -size 746965 +oid sha256:88a1260c5ad4de5b91b13fb0bee6c2d1df330123b9300ee7e444cc584f273ca2 +size 785345 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f49fe0381d8..a350f7be75b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cdee59a8537598edaf6ab46d9f156110cf86c334bbe17f35083ae731565702de -size 647845 +oid sha256:82f0c36a2d4f7f73c0de3260c7594bc8a4b20da90bd367851929c1374384c2db +size 676853 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index a49881555d3..f434fee9c48 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7d7ce7c2eeba0a23d5671f53263e6352af0d1bec7af84d6973fc5a39886a706 -size 703647 +oid sha256:02e9e83e3a7a7e9476b4f40d4cfdfd1469f379e29a93c37a8d016ee1f2fa6a18 +size 732607 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 24d4db1780b..acae6215191 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:97bb7f92ce483acc13b341ed08ad66862b19ff33bce17f9377398c29c90f28dc -size 609610 +oid sha256:879ca8d430280e06a24d7c16fa65149b267e92ef47c6d6857895a8ca3f63ef7a +size 629047 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index e5f5603b68e..4e614cbce9d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d391d9b311a5bcdfe99c7527991b38bc34749725db238759094be9e680306503 -size 702701 +oid sha256:f762485f2e98b8c7f93ab5291033450df7954286cd6cb24b7b568963d253fc4a +size 730179 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index ad5e1f366c0..37d88dcbcbb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:74c458affebc4b2ee74487905609af0cde99b05ffa6ba751bdff1b8d0ab6a29d -size 685921 +oid sha256:27351a8b811e1706038c8b9da3802fda80fe23041c34ad34719545afab2a5f48 +size 721737 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index 3a5c8960d81..97577a6025b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b7ebe25dcd9bbe7acf6cd3d299d30bd80bab61c8a83c96e38673803cc9cc25d6 -size 670269 +oid sha256:f04b570bee24dec0facda12a5288a8f7fefefc5176707b436b67c419bb44045b +size 703025 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index d273880df4e..7646a49fca7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:001c0b594a0382bf3c5be7d40ccebfedf7dc236490b45e8093cfcfbc800790c6 -size 592856 +oid sha256:3d3be9cc0c407d4740273f977079d64132616d2271ddc343e898736a90863cf1 +size 619003 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 693e8f926ca..60a6591c326 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:637603ddad91d0b53c2681da6e423c99a452829711d2779aa450ae280b10f799 -size 695547 +oid sha256:d59aee6a4138cb9b4dd3f307f78c83e42ebcea2ae8092230ffed08de3c923694 +size 723025 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 5205ff7d99f..ce041f26862 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d600fa5054ad22d37440dd317496afa6e4b2294bf11c5f926f0954558f6c844e -size 678767 +oid sha256:66dd7c52b6f2906e200916c75ef7a3a1154fea65b08b4a7d56216722b6fdb038 +size 714583 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp index ea98beaf80f..4a2c87b45ba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3a7c1937f1832665864d9d0cde3f9828127651fb887c3551b477dc647c6db3a -size 663115 +oid sha256:69beccebd55c1dd8cc68b23066e5dc1fd6fb0b147ef8d4fa7128d516514faa27 +size 695873 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp index 30900a27e1e..9a89f4586c4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ee0ca37ae1c73e230f5299e0fc69ff6923122cf6e7abc363c20882acf68bdd2 -size 585702 +oid sha256:7af8a03cfc3fb02449b760996bcbfda3d2eba5f859ed857f54ee62bfede20c79 +size 611848 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 33ecd5648c9..c8554bee843 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd6fd795f331de65d613cf1b2dbd7f72815a4543adf49bc046a9849f9872c2e6 -size 607580 +oid sha256:9263164a6c6322e4c529419223d0985698ad2fbd39270163ef28978b41b7fd15 +size 638069 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index fa42613cfa8..1d00605b50a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8fc8d5ef23f2a03faeff03b43fc697b517ae5053ef7efb0d844daac2c96ac8cb -size 582616 +oid sha256:9c16ec7e6a3e8ffa805483fd235b5d71ede6c416b12691cc71a3f3bba62822b9 +size 603040 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 307a77479e5..5f6b44e63d9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad12f6d2f410f705970915d080d9060df91c2962e0c261904296cfd241c0b4b6 -size 599830 +oid sha256:3344376f219df977e7d4b139e081525f6569a30f72270e801a3db2e36dcd9e4e +size 629923 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c2f2e4a271a..54d114b2028 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f46e8d97baa9327872761cd2dbd06a24d1961d4f9b0489b5693d34b4724376c0 -size 576296 +oid sha256:3f0369649ce3188bceca50b906a638e45e76994e459da4383a31ad20a5e49082 +size 597608 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 5a359e464c5..8c32f99a4fc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:49610e47aa191d66240ab42b89b1c5bff762af6845953ef15fa51bff6a67b031 -size 599864 +oid sha256:353efafb109961700e5e9cb6a2d22b547ef968fbf5446747cf6098a0b3352714 +size 635337 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index faf74d4ee97..967f67abb70 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23efe0749463e937dd8b3e388d25c5dc2b8e8cc7914543c20ba8581ada79781c -size 522896 +oid sha256:ae89e0ae6c3245e81feb6d29c5d2e359d68974603d508e203b52933f56b9f20a +size 551362 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 683a72e91d1..7ec4bcca4fc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c68c0bf7ceee892bed0d43fb2e12d9dc53469c04c72252ffe6b9b4e8d533d5ef -size 573026 +oid sha256:ad046f211ef820aeeb1e210e809a630a98d17fe2c98029cd4a67dad26c3c1de2 +size 598334 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index aec456d0402..ef715e7051e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:434c2f8662262ee1a4d62d90e31cb631e792f2e99625ef49043cc30f035f229a -size 496106 +oid sha256:abf8da332db69791c70808d1393ffad7df24c1ecac7f4736d51707a72ea146a4 +size 515198 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index c2ae0b19629..6557b8c20aa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e468119e17f25826ee7d7178044fe2123431f93bb3edd0d5a84072c86f7029b -size 600428 +oid sha256:69fa0d42b4602f1c03788198ef2641356af49be0696dd1dc4df5c843c4a9cc16 +size 630917 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 866e947a1d0..49406ead397 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b1b9a34b8d48ffa1caecd3c9c11e2efc0cffaf7ae6f5616cb13d02787ace5068 -size 575462 +oid sha256:3b0785a41d755f30f590db953d1821fdd5062bade1e059804dba53291aa80132 +size 595886 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7a394d2a9c7..c1a90f07eb4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4c03b394f63f1efd69ee83441487ddc55f5734fb52864908034743bdb091446 -size 592676 +oid sha256:c107cf4dc7cef45a95d707ad1dbd8e637c9f094765c2a5d661fc7d731dd4bb6c +size 622771 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 642a8a90ce1..e53b6aabc4f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:17b6510afff20a753824564cd0bea884fcf29a866722f34e65ad57a666a6db6d -size 569142 +oid sha256:3a852ba835ce9fdbc7646c1f7c2ac0af7534d8c752098d4a902758fdff4e4deb +size 590454 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index d5c9826216c..7cb3c8f47f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c5a27c50516f8b6c9cf6d9c7eb5395b6b4f99c38b9f71b60ef093bece480eca3 -size 592760 +oid sha256:f36db9ca488072f01040ef43d3b6ef2cec4361e414649a0f7cd0d97386b89b2d +size 628281 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 09654573551..63a1abb3635 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73648faa9e5e6062f5f84b14da5f9b97ed76dc3a2826f3a25003eac0b6a2814c -size 515744 +oid sha256:2fd83d4c741f318a246f1cf5b79dae438d715745b198654bc92e4a4c7f0b794b +size 544208 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index a1c63bd9ab9..33ec6e63686 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:876289c3eb1ce92322c542ae4b61f5bd23ab7fa455a6c5e8c08b9e4010e32e82 -size 565774 +oid sha256:b9491dd50b5a3676134726e84637cc91ad90ce70c9d5c3a27930a67ce3e7c8da +size 591278 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 070f5a3b55b..9210ea3cbc7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:998bd96ecedf20915dacd7c4bd26989064553fd8f45686d2140bc66f51b62edb -size 489002 +oid sha256:fa7c0adaced79286aaadca2a7f2d2138979679c82e17be57ba27f7db78e98ea7 +size 508046 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 561fb05c4b2..a6a3b71b301 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b87d1d069354d2943511885a7e06bf94f2f7a39177193ee05169d5f8e3e19229 -size 628237 +oid sha256:4998e877c0731e2b075f253e8b13fbeef2998338b310a4b145622051dfafbc21 +size 658133 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 39d002172d3..e7b1cbf1fa3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46079ca1a10584efa539bd2745543a4dcf38a6cb76ea8bb5498d96c2f4af95c4 -size 593602 +oid sha256:8e403d75546020a2194f83437565e06db127292dfadfe4fbf09f2d05285b2c3e +size 610770 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index b0fa7f7d3ca..66958df7bf3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27771b189f4d73d8e411f57ae19d7d62345a1799405e0c61f214b1d59996c3c7 -size 619647 +oid sha256:3999a747347f3ac02543597fba16d91848eb941383e16089aabbd7d991e5a1ee +size 651319 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 77ba52ffb08..748f082aabe 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d44590f11602b7a2cc8c8f4e92643234b4caec223b4bea983e15225da6f9a87e -size 596062 +oid sha256:a4bcbafaaa55c5e0fc42ee8371a60b8383067d9f0a9b4163ce53837883ef2af0 +size 616586 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 61be3300803..1ea96c631ef 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:95f5d1330a8f7da35d677f07e5e597d728bed5280cbf630e42f52a6b8ea98aa5 -size 624123 +oid sha256:eebce5d3de3a6972b5a0350e2298fed51a599964cd1498ac9c19b82adc31ccac +size 661813 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e14fc55b263..8f4726ea46a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:805a8abcada9bba84b4772d43de55836b4214868b78095b6a18b2a130c2e9f02 -size 534474 +oid sha256:c26f5a849021c6235f0e4e1345bdd8753f349fd90d91d3261b31af6c67f890cd +size 563582 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 4edb9213631..0fd90b9a86f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:74a43e1b9938ffa8d238e125cb22fe2ab5fe4cb8428706bd7ab08d3675cbf5d3 -size 594322 +oid sha256:66bfc13700a02e3cf66ab792f164e7940df1415fbcf73e0b67c84ad9871173db +size 621013 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0b87b0b8c07..67ab4535f9b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f69cb599e87754c1c53edef4dca5b7b62786832889b0ad6421b450f4dce67eb0 -size 507586 +oid sha256:cef365f8c372da522ff55cea827e268d8630be58abc793ef9a297ea170f69155 +size 526036 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f5f992e9118..424694cacb2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f1e826fe2525ccd700595a3cbfb137f5bf4c29c81fa06f5e242f84bde657651 -size 620295 +oid sha256:3d098053cc11b86ab733c5370ce27984a05b0cb1dbb4024e692fd89465188ad6 +size 650783 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 69ec42201ab..4afc5b8c8bb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2300f8b7ce1c44aeee3c70beebdac6a49266e82ebf4ff8b44e420e5d3791a5e2 -size 586448 +oid sha256:10ebd868178b322e91fa03213491b3be03711156a6b0a89439e4abbbf103ed8c +size 603616 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 75a71e6cd51..b69fae792b6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fbb260ab3f3a6e9db3171e70e3c0513ecbcc756c666b2dfdc9b607c01718bb98 -size 612492 +oid sha256:2b202dd2605dda5b6982b376983d711b6a9a1ffc3308f90f9af8d0781cdd8c67 +size 644165 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 9f721f47506..395d5680ec2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da32bb75a26a78bf8d0f29cdb84132229aaf342c46c7f0fe92d2227080908759 -size 588910 +oid sha256:17d376cb3ae94e6d937d58b4211bbf237f99b95115cbd588abf257e06d6b056b +size 609432 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 17c24eb686a..24323f7efb4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b074a17667bd978dff535e7666d282efe4a9be7a93cb2722ab54278ee32a4f97 -size 616968 +oid sha256:e6eb03856f84b6a5fbb4d392962bb07931acd0c124e4fd1d51b796a408ed2efc +size 654659 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a464f009ad7..33a2a856f33 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a38b3c4fe694233cdf46eb54666f256172da411941b0e99974ee494facee8353 -size 528110 +oid sha256:1e82604d1bad4dfc44f7379326a2fdb715e3fcb837e676f0bd1dec9f663f0892 +size 556428 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index de669e48cc6..9a31f5a74da 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7df6433a45b99df899b8fe8850c5869e5a0d7ff286c3e29c24d9e0fa5e0579a9 -size 587168 +oid sha256:426c4d56a2fb187bf7e8f1be6815f476ecc7cb6c4578a7b8ec7b98b660c8ae5e +size 613858 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7fdfe5128a9..b7bbde3021c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0253ecb387c003a77b5053e302012511e6c7c70f73a3d42837a04853fcd954e6 -size 500432 +oid sha256:a90005945aca4f8032d5148aff450a44e32fb56efbf2ae726ceec57c219d1372 +size 519672 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index b856401bae3..30d8c45ad97 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:54dba7c56b415d44e884d78b5b773b0f1ce7f839b4f8cd57e918d4e2c240136a -size 619895 +oid sha256:57f08fdc1d0f49b664bb9ad8eec24efaeff89178fad98c4d26d25406a669dbce +size 622903 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index b32cd226b61..421ecf2fa92 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:66e46048017ba75f5d1f23af1dd53ee8ac7ec86e748fd2e9eb1de29f96184b70 -size 548006 +oid sha256:6899fb1bde47963bcca2a63d51e93ef693bf974f1f323761ae3589840b45eefd +size 551658 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 8e1b219eb11..49eb7f97e1d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fb3c27a6c1aa570407d32857493390fe63d920772268b992d2ca360d1b2fe2b1 -size 604304 +oid sha256:9a41877e98b254dc2ea5c668d65373e40a6fffa014cb1751636f5c9d38353c83 +size 607314 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 1be5302edef..d7809633804 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c980cee30d95042b3d490da20fd50dd1b02436832bd97a19ac39b2e2c2bc1fe9 -size 536956 +oid sha256:c93272f4d005b8e34bb8d88ccb77d6a0bd2bc545809cf7bdc165144521e63fc9 +size 540606 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index ffc1ec890a3..9e1cbd6afe4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:937a03c87fe7baa9235ff642fe6a0ee14b3a584761a7488ef8c99330c3069333 -size 604184 +oid sha256:5510d966fa6b1dce103427fd25aabc9560f097495d0ec6e728ceee3fb258c550 +size 474142 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 247de4ba84b..8d9fc4f3c54 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ddeb4b5d86a24d57b3d86dde2919eb4f8a34f391646a416b14e24b86b33cfd7 -size 563950 +oid sha256:a5280da5d680ff7f72a57ad9edd79700979b245902c63447adfd6c9739334a80 +size 439680 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 865e25edb0f..af92a423917 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9cedfee66142c0dadeec96d10f8fe2893f5e6ce76797859609738fa281b6a4cb -size 592658 +oid sha256:6c3c28f4cc5a13b48cbe77fb8852f69ec648c4353d6bcc09edfbb11212127c5e +size 463406 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d1ed83dfff0..12dcceb8b94 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:97dd51bd0c1674a4a9b5f19a08e3165764b735911dba8019798a2b8ee634c6d8 -size 558740 +oid sha256:3130f72c95fb9c8fcba05d12368726cab2eac5c0e20b284155ed6c30602aac90 +size 436072 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 6054be7f4ed..176a2e4f3ee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab412bd3ec7b868548b9cd906706d02ce02d60b16cbf529c4bb8fcd085d311e7 -size 617228 +oid sha256:83b6c4da3b96564cefee87a283a98d8b4f99b621c54a213ce84189f0a5bdab93 +size 619941 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 948b3e8f739..791a2d0fbc5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:50be99ebc76dd03b8f30f7dcc4d7e547fcd34b0ab9acf5ce404e263881c57850 -size 548104 +oid sha256:cbffb226a07a771f8a46c83d0b6fe2ea8865751679cb000165928581cc22b075 +size 555652 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 3d18f67a34d..5840fd8b759 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7a2ce44731c463488e43058f70759330a12028392e2d5972bdee2751eb751c99 -size 565462 +oid sha256:d31c23e80e493b75a4dd2dd390d4b21019f41504287c5fab410fecd015b90fd2 +size 440056 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e21c35489e2..ccc5b673fc4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2a60c4272eda6c5fd3d383a65d480194fc2071ff61adef206895ff8eb91824ea -size 510496 +oid sha256:0465527be87dfb022ea9bf2662e044579c7a15001aebc5d6f7cf5e0608c3bc69 +size 381292 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 5578fdf9e50..a98576d8939 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86b0bca23deb03a53d18edad6ead38de49bab0a2d4daa0d74d5d02141aa67997 -size 536328 +oid sha256:fc1d3ce6cc01e65740119b3002fddd274955d43eb7a947602f3c0a46576704e2 +size 419828 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 63565ce696c..77792fe7cbc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:12056a25a8ccbcdda219490c4837b07e61bbc57f9f9eb673a588f42ef1fed27f -size 482178 +oid sha256:e31d01149d4098c016d6f1aada01c7987d3a1a608c35c7961a662a98450a9f55 +size 361854 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 0b1191c4b18..1821661f8b1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1e785786a0aa479b87c6546b45c8eed4dd76eda6e199d192bf320e33a1569a8a -size 592344 +oid sha256:ea17a95af4a087ba1ec9a31ff8a1523e30d58db2fdd3e183bfdbc5588b75512d +size 463090 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 8bdf69f05b5..1128f698e64 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:42e901675d4a0475ea658d1e8d962e7d8ec84fd790029e62958f92c09ffbc002 -size 552110 +oid sha256:0a55a4d75d14da3bd866d8a421d28a1bbd86d627b0672a18dcda995e70c3646d +size 428630 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 1d93387f311..6bbe51f02f5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:200fa26119f4c38f7b34e5d36e163e9e929b6902e5c5ed37a5a0a869bb4f11ee -size 580818 +oid sha256:8454b66e6ecaa4c48d0218fe89caf824a2be2452eabff51d170a5f5703bc267f +size 452354 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index facbff601e7..eaea4985ae0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8108cbdd9e822f8e1ba2873435e9ff9458fb849ae2f013d1e9f4b11297e2bb48 -size 546900 +oid sha256:0999d54f8ff881ddef254c6411e1b1182f5af45110636d8ad165f6d65ce76022 +size 425022 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index f8bc23d2226..ef35454a1b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd6fc958718fdbcca1c7c7fe4427dabba748409a793e4bfaa601323d2b7625cc -size 604302 +oid sha256:ad0cfccc3eee42ecf5dc55cb478901a5e650298b210e796be89480d8f45f36e7 +size 608100 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 040acf2f14b..5dee8faba51 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70793286ebab5ffaf27af46baa3550bec66103801ef1b6fb74b5356151882f2a -size 537052 +oid sha256:1656619999a40062a98d99c18d219748d407d6730af7f0d8cabcc49045af1d90 +size 544600 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 55e2969e5f5..9a4edbbe1d2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0ece8b4952e05534b8b06642924737d702d9ab6c2c1a5f339da8791ccce6635 -size 553622 +oid sha256:597ee48cd3b1b1c566c65d0389938b04b33f51e6865e9e6b170b4aa19cce15a9 +size 429796 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 8749282c36c..587dc6827f9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3808aec3bedc525eaea8f6a29e3d6c4872a9f3aa67b950ff3a25e8133d430f46 -size 499446 +oid sha256:db14f4f758405f8f496f9e74a435875a692b8a075c042bb0af328f2d7e90499d +size 371032 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 0da445cde33..f2efd73cac8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b95b00702b383ed4135d3874099772c0e25428c704d77c9f4006fa482f24283 -size 524488 +oid sha256:8af72234370321683ae1a15279b2700140cd75c189ec53659aff85821c288cb8 +size 408778 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index cc3e210e96d..53062b08661 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab4113d900fcc55dc2df3c542c4505604c000edffe10ca2a28e5d557eceb05b4 -size 470338 +oid sha256:7f719717c5bc420cf92836841404b4c4486921734bc6739550041bcbfb8d4219 +size 350802 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index f0a5e761c41..b2e0dd389dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8dfb63c376aa21d551d86a4d5d100233d778de49c06db5292b495a2263653db2 -size 619191 +oid sha256:528db8aa61f4e5cc7ad3376dcc857f53500d32bde28ff9405438bd091ee997b0 +size 493908 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index baa348edc56..04213d35310 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f81ce128edf499823ff921bcb1bc03efea5a3d09be29238d25ffda1ff2a36104 -size 576664 +oid sha256:f762a75aa1446ed2857c9162d246b0e1868b14728a6dbe49c5642004a210a204 +size 459448 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index e8180de2cff..ee6a8dd2bed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab1e6275782d2f364687c064513aedad21868ffa1b58b97c5b488880b663814e -size 607666 +oid sha256:992440467d06091d909ac2017140424c3a0ef94ab5c11618223167e333a3b547 +size 483172 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ef45d246476..6f502a2e8b7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bfb98d7346662903c3bdf98f0437b2a05c12b652112e308354d325b29f3407e0 -size 571452 +oid sha256:c28cad80c2d7c71cf73b159abd8b77ddd04d26a7d29886d6f42f7a902f3f5035 +size 454262 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index a010d96c2e5..650e1f26452 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:35f49cbdf022effd37e7971520aff327a87d37c660001132d803f7ec8088c5a2 -size 659491 +oid sha256:b19613b037e3c36a2b8e733b3fc233be2ee4f5256a79220ef1e8a552c7ca62df +size 662501 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index c39a78ef111..0bea092a7ac 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c438974a5c48edd1cde46677cb91c61b9d8f8ff6984e5d4986aa5b4ad3bb63e1 -size 588294 +oid sha256:1f7ce873cf61132501bc9d9dcd5d411db607830db401cd06526adaa49170cb21 +size 594658 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 64b958ee004..131239fcd0c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8da818853b16dd3506f45fd51be035bf277707c2bcf1c66ac622c4b45c0009ef -size 579606 +oid sha256:027dcae51dfadd8d26959fecedd98cbaf76f5707fa37f88141748b3d492e706e +size 464462 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index a4faaea5c2c..f7879e8971c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:659d8b87be662901aa42ee39f2606e61595f0be7d1ef243294dc7b328bde112a -size 516796 +oid sha256:936720e36c58f85167579057e753df5cdf0e3594210ad3fa69c26aa03a465c66 +size 394622 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 31ceb8ddfed..c5499ad59f1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2d702dfcd25f87ca6e628f1e1606af711bc01cd7897cee171d3e6be33786092 -size 549708 +oid sha256:359c182629dde2f03c819f87c06a3e79c341b307b4cd32f10d1a45b63b692406 +size 440286 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 5f88dde3104..515d7f67e6e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15b891fa92775111932f932f38ffea317b33764a68bc6d5df4d3ccbdc7a5aac4 -size 486108 +oid sha256:b6398f354426a0a63130a5c70ad4a269ab43984244d818a78c042a19076bc7ad +size 373604 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 00fc95d60ce..cc5b6a50663 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:704783ebee20928571a6b0dbd63bb47b2be19746bd5d2c05f787fbb0c21ca890 -size 608140 +oid sha256:058304ae26d40bb05a4b16a924f97edaf7b85807d184e52af3e8ee0a0b0bf2a0 +size 482858 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 31f10477984..7bbe85db603 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0154b76c0538aa27b89da0131b4c0b7a2275a0641ef9e8fb0dec9299305c47a1 -size 565612 +oid sha256:0d3cdea40ff91bcee3b9fe2c826cb01047a424582301f0949f681a8f5b84d159 +size 448396 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 109be274fef..423b9a9507a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:607e13e1e7becef3e8438b4a6dd484352ec55d6d54bead244ae0d276e8751fe2 -size 596614 +oid sha256:5322ac84aa82b01cf5a119a1df73daf0c4b55b58f0b9e07508d35f9ac4ded6b1 +size 472122 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 7ad3263a459..fa593aba1f0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:343c92efc40a3307a341560b215e76b6698a51348ff47332a980ee4f106761d3 -size 560402 +oid sha256:0c5aa9f901ef5f28dc7715b73b8514752620757c28febace55462514a6b00245 +size 443210 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 734d38608a9..017c90aa3c7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e43895d2b000b8de5ed5e150ce916a555a1df967fbf24b2e1876e772ec755ca3 -size 643903 +oid sha256:40348ed9cb2f393749c346aa1e7f3f437ea2093a82b335b7d23411dcfb221ee2 +size 646911 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 5d00cc40b53..a11dc45f3b3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d79d33348be6c8e0723b9db70b9faf13eb8601699599a3bb84c8cec04c24798 -size 577244 +oid sha256:5989a9efa6d30e5639829b4d5664dc0108e8ed68dea1088be5a936febf4b256d +size 583608 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 513578b0ce6..218e853e774 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b52bdc81f67462e518eef543d5f409e2f12b455480993ae282d825ebe61c73c -size 567766 +oid sha256:ef12f5aa4ac74b4c16d7f576e551de4887c24f9fb07149dce86ffb27cc1fea04 +size 451042 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2e95ad78bc0..cd440cc208a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:779a7ee25339298fe1cd8b64aeb40427a4330ca41c435fd27bcf13126cac7f1f -size 505746 +oid sha256:f41d2a2e3a6efe9c1f143b5ae1bbb741333407130e9840f4abd87025627dba6c +size 383572 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 6af83bc0727..a6a247cf8df 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dffc647ecc2f93f8f9331db08b4b1c177831eb84e673dcc3d9e41ae5a1842d19 -size 537868 +oid sha256:4bf9344d8d4e356fc956ede67f607d992a043a3af784eedde1ed8b8add501d51 +size 427656 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0909ae8220b..36ef7fcb64c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6ec3c2ec38b6d55936ccaa1b1356d99fcf2b1d8c1ec8b4a9b21d401fa2cbebea -size 475058 +oid sha256:ca5f27956766420d58fe0d8357be06d4ab90c30578b2c10e7205ac8dcccba89a +size 362554 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 00000000000..b51190dec80 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b886263552b01eca22446083100dbe4fb8c7c22f413a5583ce46ecc72963efc7 +size 649347 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 00000000000..518367a090d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:614591ee54ad1964984daf36942b05a497850f57ebb1c70af98481a59c140cb0 +size 560636 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 00000000000..b837b72874e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5980372ce13b8682d11afbd423606d5c314f84ef73f1b61a91228c27136f08f +size 626455 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 00000000000..7ac919ad7da --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a7774dd8fef394ff2b2cdb813c6c7cd8d31915601490aee5e8422539a6619d8 +size 539324 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..051db4ddeed --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:170b9f436d5f035b80d03a3a1ebbf74e0b26776b6e8e034635ad113972766ded +size 587806 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..46c38b93b49 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b5316e5a4fe8568a136f7d661d053f679833b9a6f5dfd5c3fbb835cc86d11d0 +size 558080 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..d36ac8baddc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c048382c54eea6e5a23b6b7f32045c1b56c5e030223991fc60d22e422d9a81cc +size 574702 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..8f8856b6fe7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3ac93a7017356bb25e26bf4636101f6b66ed3c49362619dd0ed65892363eee8 +size 549736 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 00000000000..ca235f4f7cc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3094fd70c6480ec5211f5e95ca90f2ca4cd382315ec75b779559f0fed3083252 +size 652501 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 00000000000..aff53f00caa --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46f59ba04761b1500b0c7c1f24d14013e3f28c3bf5bfaffa2d079f04760ba63a +size 596524 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..30688661ed2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21e1650420f961527554e708004f1cc2f38386a9ed4637b8bfd46827caba9fd5 +size 547406 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..eadbb384e19 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aafa0a87d6332dcd34376b031e09cd3ea221f0cc496852bdb83513d72d1e5b08 +size 490220 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..cccb02c2586 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b381e28f1828676792289de189f7a4665971e6e8bdc389b68db3d1bbc01529c7 +size 526388 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..279d439f9d3 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f25a350c347903fd08bb727f90421e012a74b510c8560e3763d8f216063201d5 +size 469992 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..51e87524021 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69be664c151d01b901526ef6778385980bbdd308912fa52f5e0d27b4a66ee41f +size 566494 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..ab23fc608be --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6b899f05555a58be489e70cbf8df1eb13bd99fb8a563e594bb6322e7da15ef8 +size 536768 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..2dd3f88e88e --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6549754e3537a2b7a6983233da209edbc260b43a3277483859f348d047c6ef04 +size 553390 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..aed4ae77711 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dfe61cb641f84f5b79d45816cb5d38fd2df947259c7b3d2979524e0097e2484 +size 529214 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 00000000000..34dadbde06f --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a424745891d74ddd37958fb5b0c08006ccb1c3a68782f3c2da5a05467c9135b6 +size 629611 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 00000000000..3801535e0cc --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eef93912f87e5df1f346e7de31ae15bb88971ea936b6e6e38572493bbfb9307a +size 575212 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..82a5ca23c83 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2add5e033cde7d0f305377eccd927474d874e1531ea95beab38e1f3798c20797 +size 526094 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..9031e3d78c2 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f73b30e54cb6541e11a135be4ffad28583734574cbb7fb55e79a11dfe81bae0e +size 468908 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..333352b2620 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6687bf47177ce1344f3d5799e013faaacc4d0c1e647a87b0a727f6a8527c7d83 +size 504286 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..7242abe8458 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a5917519fb23d1484927c57ea2a6e0d0c1d5788fa910a442b44596fde2fd6ed +size 448680 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..d4ec36dda47 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32ce943ecf523ea41d2c8022a8a7d698c5bd3873c1a4f14773ba0ff9a5ac8cf6 +size 607572 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..f58e53bd8d7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616c5405f624008e488caacfedd13771f180d0b5cb6cfcd5d91647cda24fbac5 +size 577848 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..4f3e3ac397c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f2be3a913d49da48cc183263b12b0c54e77ca0b7a30a0f27fc078d364ac4e31 +size 593680 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..11d62ff6cdd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d1e13054afbf7583d5873db85599db6430ff12b1e2bd1395960021824a11229 +size 568714 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 00000000000..0e17e1a79b1 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f339a89e9f9fd6c3baa9329cf72946aba2e85fb1e805a712e68bc05ab4a52ae +size 691311 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 00000000000..2b8d9549c9c --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cab5fcd991598582e172e90bb1bc230fe77851ffffa0526f6e9c17f325e8070 +size 598902 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..1e7d8415844 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d569215909446326fc010b33ccf4b14eb018862a3a34e026850e0ddb8a3b74e +size 571810 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..53e6e4639a8 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ba10fb68440f1a5b13b92785ac66d260f87c24e948981de3344fc8696951521 +size 498024 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..7f923a6c174 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e95ecba444f850248c8e38f82853b89f47995f19b5805404f714a4e32f4d2230 +size 546846 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..06936f28865 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60bd7ece9198a84ea65c4954615ade8e5e1acc2a710d79cd41a1cf285fe7f3fd +size 476218 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..04edb18e008 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c5baa3c99323f93dcabb6a6923005b5b821e83135a4222235ff7ee86bd8e05a +size 586260 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..f084204806d --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:649141a4265b31c72ee3d79c89f3cafbb0448275638db7628cbf9665e52d9701 +size 556536 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..5e962ea5d80 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa0d06d0cf177bb69f758966414215f45c2f989ea930e538f352a0beed73d739 +size 573156 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..ba0e51631fd --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:369b41a6a173128181e8ee96228dc8b68d69fc804ee74274ddf06853d11e0cb9 +size 547402 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp new file mode 100644 index 00000000000..ac0d96fed33 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03585a8765082efe51f7b25829ee4c85444749a09673026f9c053ca0f02cab35 +size 667631 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp new file mode 100644 index 00000000000..e5e7a447f24 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eacf2d6e85f70f5b32bec7e0038d7ae6b7c19b47a2db4a2ce134aa3876b49d72 +size 578378 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..5b56d74b6a4 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f53be443d24bd1bc78a2c4eb58261ead9aabad3a1a79b6561b1606435a37745 +size 548130 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..6d769492b49 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69814c363d23db97cca4cadc9a11cef98fb4ff435bfc6c208ce7cd298bc85fda +size 477502 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..671180af5b7 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aa59a2ad05d4e7a168c6a45ed5665bc90f0efc16880f2bc6431e7a1d175f267 +size 523166 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp new file mode 100644 index 00000000000..4c8703f44e9 --- /dev/null +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73d106ae6f0bc9137ca4756367f76c7846f37b0ec9a715b34a425e32d6603cdc +size 455694 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 2c226dc48e6..96549bada91 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24bb470c1bd89e024f9e72286e451c2f39c4e1b4c4fb96e163563949cb422f3d -size 546358 +oid sha256:23b74693a7b56b218b730ff7726c383c8da45ebc794e8fccb43d44897712d56a +size 551736 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 8b775fc4267..6eb5658290d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6623a609dc3f33d88d7974ac0823a222787f0decbe7b7670e35a963cf286b774 -size 486064 +oid sha256:7534867604f3977f3894277644c11ec148dbefd5ca421f73261e7a1bf11a13dc +size 492206 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 7dce4dc1ca9..35a1759c50b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d644d5958a68ba109adc12e86664b2dc599b6609287a19a93dfbf0f260e9622b -size 532150 +oid sha256:a71eff43c66babb4e1249bd1a12ec7e8041566849729697243437a6ee2b2a636 +size 537528 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 46f6f539b85..d4efa43c667 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e6f73841d6bf26d9a7f9c21ca447442a2f46bf1766b1037c454cc36a9d4088b -size 475014 +oid sha256:77639568a90959bb175499f8f9e9a41eb350f8aeff058c0e4e40cec36e5e2c87 +size 481946 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 7d5fec2844d..8feb854b9a1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70e6d4e44638743f081219da530427d2ac1d196994da454a308285dc95f8c528 -size 519820 +oid sha256:0344db02b9cfad07be9472ed312d9658d63d8ede5a7804873ba554bda1d25aa6 +size 447522 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ecc7068901d..27e21258cdf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:956507e4cd01a628d4c3fe1cf1ffd9a8f0828147fcd2caa86d6248c3a0ec7df2 -size 495448 +oid sha256:a4c49ce3aa2076daaf374fb9ba8438e36a3de761dcb5010fc045e32a4d48e30b +size 429414 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 5cd31377721..ce37c397940 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd9fe8d0c1ca8e270fe392cfc410f40e1c8ff38f4885f6c13b780e1292ea4e44 -size 521812 +oid sha256:8b7bf42e0b4316b873c5d65beecdf6d55e26467379725956ed9baea858784e09 +size 441498 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 78028fc6775..906919d1814 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:225eefb54cbfff8bef8f3da5b857164fc44c7905478370b4fb39808dcef471a6 -size 498228 +oid sha256:7c5ef3b3574f5fca65f2981cdbbae9c556340de35f1821777c52eb1ed690b95b +size 424228 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 7034dc34581..ab704440514 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6bd6f054ddf85f3fb670185d0cd6f970688ab0946604dedccf0e0236087affba -size 543988 +oid sha256:56418655671f5fde7794dab2e2cc012403e76a37817c2cca07b7e73ac098ebe3 +size 548576 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 92f30d67f4a..53bee45237d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:741381edb9f058cc70fe4cf6dbb35bf2f25a8cd03225a681059c02eb89baa8ab -size 486950 +oid sha256:c072e9d5e9c06f46b0a5ce69be1042511ecb5efa117e321dbdb3e3cca627a58e +size 493858 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 3eeffd3b28d..053acdc65f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:435249cd88be73a3971a6d684fd213e174f6ccc17d1b8541facdd3d2fece5131 -size 504876 +oid sha256:8a8398c32f1cb4ef4e2315334d336dab4867719922910dfc886f2776e2d2a2f6 +size 429200 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 2407ac1b294..3b10ae69624 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70f103c66e3e5a23ec269ecf0426e9af1621618f0b4b980c079890d049564b7d -size 449886 +oid sha256:69a2cac813663c627be6e2c309c56c377012b40f8c0d936063b300d4c9ab0e54 +size 372014 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 321bb039c47..a34e45bb516 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d573bbacf4ce1a6e099747d2ba84213a078fe232753e5a31d91e8b6f654878c7 -size 464544 +oid sha256:8709ef48384434890b1f836258cc86a3e485335e2189aa69728a195477241781 +size 408774 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 0d04f51acb4..25ca61a6fec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a756d07060f0291279be9a8f6bffe74b6601e41ceb5513d36ca5e1a4acf4df48 -size 422332 +oid sha256:7417b3d0cf0d9f77abb8e0e7238d554a3ad407c8dd63bd028f137b52be6f40b9 +size 351588 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 6ef4c56e73e..b7a9c555c1a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1cd00942c3c3ffa725dfdd3f304473c0a067b2e60ecfb270a4e6540e0bfd54a1 -size 508770 +oid sha256:c8d325ae432a93237e7c5426baa96096908d9974e89787f73252c2f250121d77 +size 436472 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ad0a56808f8..8cb1dab9969 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8dc3cae2aa20949a65d5defe0a6be371a335b8058dd20272d00c6cf8fddc05da -size 484396 +oid sha256:5159a1ec198ecd5fd718c4a46ea94e7114cf586bc645a0adf0572c0a658c8d5b +size 418364 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index bd8acb04c22..2a190b26ccf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:36b37dc888ecfe150cce7e63b90469ebecb54648a7c016da4bac8d27d2f28392 -size 509972 +oid sha256:8cc26ecc4219085c1696cc2b56055b2e52cd015b874e5ca6e531962233392d04 +size 430446 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index d46833e5041..3af33f80a4a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65367d23e8f0c397f3372ed789b6ef3b3272935c86bf3ef6a57bf06e5d204e31 -size 486388 +oid sha256:a863fc5e2e65d51a50a608e59c8954b12085e917b0974c90fa36a44eb68d0290 +size 413178 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index cb8bf046665..9ff716b3f3f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ce5daa22cf3476c79c82ee45b9d2e834e0ef3c5f174d0416f63edb6111ffe20 -size 532148 +oid sha256:08176763e6570fabd766bf891370864eadd5eb792ce50cbebd115dad57be977a +size 536736 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index c849a50c514..bf3a7da9e24 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75cc614ae2b76626cf70ef95724996b322d4b242f337941304f7a3eb23c8a852 -size 475900 +oid sha256:7bd6d7343fa6380f324ff57c45db12b18af8c60eff4d4221c5f5f5ed5b8ed3e9 +size 482806 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index fcb99f1d651..5938116afcd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4b1b77ad557d8ec64f6f89df68d5a676bdfb20399ddc25aff469333e41357bc -size 493036 +oid sha256:b723d8a7087244914050f796f68d2d365966c4e44ef883703b4b0ee3c425373f +size 417360 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 917b373614a..c730b02cff5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e998f4d9ad02188ba042acc16c94d43f5575c1e2fdbfff8486127770a6561465 -size 438836 +oid sha256:b532bdb072b8ae82469bb22ecef182c7cafb12a1ca847687986974d566bcbc8a +size 360964 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 1b9bb2f41c8..30b6fcd4cc1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:03f907657af335e4733b1b0114ad6c0dedaf73d409ac7cc03067cccc7cfa657e -size 453494 +oid sha256:7e41f1732d83f9daa8a5018ec5304682223b735a964d38e335427b174e72bea2 +size 396934 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index ec3da0c5ed3..476b01bf659 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86bffcf7c804035176652513e24374d8b1d5a975aa5649d4267ab00be1a450e5 -size 411282 +oid sha256:fc75ae4586f4a5d3b35cdcb6753da3f25a40a2dcd896a73c9bb2cad1f703edd7 +size 340538 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 12d56fd3892..8ece40382e4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6db8a0c51f8f6a7d82b3ebb79b199a3d618a44d12228fdb752342b561c1e2502 -size 534852 +oid sha256:e029765554afa36a417517b87ce2d0469bb802fc993fe667e7ef474cd5ce3bd8 +size 468078 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 3b6ea9ad9cf..c730d9d907c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9081dc0155b095e121c128b00b0602b9aef7ccfa1fbb419c1f4b50c4b02795a0 -size 510478 +oid sha256:b8ead6f7711a85233251a912da952130b7daed6d2765a01231ee3328a390652f +size 448392 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 24d48abdd7f..b7c0fbd0182 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:95176cdb06d22f695dde6ebe6e7bfebb71ef3a6e471e6e2a8341c9addd74da0e -size 536844 +oid sha256:597b1914ed801211cc26d5b69d0b4eae6cd01c09e19333c4605cd1442f8cdcca +size 462054 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index 18c40bdede6..e4412cc69bd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0d9d8c77191c7866e365284b711eb9c24a58e903fe98ec090e917fb234a6fd7 -size 511682 +oid sha256:480a593fa3b0e4c8a1ac5b1617375381a729435215acae21135a09e475d1a86d +size 443206 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 2706c20de24..90e175dd900 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd9a1c47c6487f153cf9093c4bd4a4cbd9dcf42f3923a515ebdbdfeb91b4d999 -size 585956 +oid sha256:1df2d660d14929c3d814a1713ba3a3c0f73c6f82da8734d4f6c352dc07394e08 +size 590544 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 0851dbacf32..a13b72b6623 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0763a7c662a477a1e98461833598a037548e4ddaac49130fe9b973f047f21845 -size 524872 +oid sha256:3b96d4dc68b7f5c38bc9095ccec1fb4ae0e820f66b8ca3c998abd4c99b45c1b5 +size 531804 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 53f3983e8f9..6b2f58c5a0b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8cf54ea4c6cbc510d51ac9a0f5f40088c8954cb3c2ce45745e9e98aebe816b50 -size 519834 +oid sha256:dc108c676e353473f5020038d34f2d738ab77ca99212e2f2ff6737d693bfbf48 +size 453604 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index da6382a7716..db1b972a834 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f188a4fef05305cedf6b10b1c963b379e64f4241b0b24f3972033d2c31e0b4c8 -size 453794 +oid sha256:eef3cc4a895b07fa25d485a25b64fe9200f08a43509aad6d7cd50cffe877e977 +size 382976 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 3f91067e2da..3073982e4f6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9741297eb68e66ce977c86ee60a53cab1e6b4c34020cd87f76e8f97a2512281a -size 489146 +oid sha256:cdbf75725eb09812cd5e9ab158b01a9f6253cc2371fa1483f74c69a38abdfd90 +size 428442 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index dc079435f87..47df8909417 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a75b25072d9ce368087b8afd0cf8925ace0841a3b8570f6c7f3da27608c37a90 -size 425450 +oid sha256:1b548bd3db135d40680107376f61e3708112ba5143c3ba293ca8843c5ca3854f +size 361760 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 909ad8ae9a7..6e9aff04a0c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89af3c95f44c160d3523a49a6ff08a3276ed371faa8b053017902261fcb75377 -size 524590 +oid sha256:e4cce15a7af1768d3057e107ef3565cfbbaab787416fc6a61897568303a959aa +size 457028 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index e5bbd5fd137..5927156f557 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:906e70d64949ee77571d3e1015c0bb813654018974bf78190c9490f0428af1cf -size 499428 +oid sha256:73e2f3eb4bda6c61a37696e4ad8a4aa46ab44c2c09097e578480d6906cb38987 +size 437342 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 40c27bbaca8..4c719521ba4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b4f88492191dd0aa6391ac37e5ce9f88cb9b59132d6f3bdfbbe4364299d5a35 -size 525792 +oid sha256:398c5bc760fc54905b3d1f8eb36e24e57a355f4aeb0bfb283877939ca68b2a26 +size 451004 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c2010b8a821..65ac85dcd94 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:52452b87baa2fb3a3755e70860f4448a81b4a0dcd76567dc65bc817bc4f20f9c -size 500630 +oid sha256:82e3b8ac5cdfdf060d697850d0f62bdea69452dfe707b06ce9aaa05a4fdc536a +size 432156 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 28f1b392844..755af78334e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8d3a3b84c8569e2d13ae6097bc8331ab83d268fc77cf17fd309d3daebbffef9 -size 571748 +oid sha256:787b0d587cf05259368beb7d8f746a92a58eae352a1a2f9367851075c4d261df +size 577124 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 760d2c561fa..9e9b2088920 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:298d80727f7bb9759445ebbb421945c98e9890e596626bbbb39f935d1f4207d9 -size 513822 +oid sha256:a844dc2b5084e59ef9179fa80e8ff8115a33f1786c1e668d3a7e6145ec20c350 +size 520754 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp index 16449fcb478..0593e596523 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f6d0c36af070b5ec461c43a9cfcb04a2251cef55097044671dec1351adf21f69 -size 507994 +oid sha256:56028e80dd5b0b5846436cefeb6994b2b0b2aae0282ed8ed4f6afb719e54dc6a +size 440186 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp index 9428d7526e4..ca3e00ea686 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:40e73af9a638193d275bac4cf02643db59937f10a74ce2cf22bb31429d7a0439 -size 442742 +oid sha256:1df31f5e78a316107dd4c16a3e6c35972c45f3c7c51ec64640b5b17b92380472 +size 371924 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp index 6aac526fb46..19a8f1133e8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f56d9e9e369272779ec775663d331e41a6e85c186c047c72c85d8a6bce97703 -size 477306 +oid sha256:3ae50e9c8c139b298f353ac3840d24b5de9357d2a380031ee7f9ec97a7618dab +size 415812 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp index c3d5940f15a..257beda06ec 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3ae3b10a871456e3b2a24fec8dbb3b72dff7dff9d8cf6289b02b62a9b103aa4 -size 414398 +oid sha256:6de41dd54fbfa28fde081eb1e90c5ac43ff047cda7dd677f2a97c7aa8f1c9533 +size 350710 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 8c06eb50782..8091f587813 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:03dedc215216da4578b54aaf20dec3730b4cb6d5a68ff686b0b98edd53972775 -size 709959 +oid sha256:760aeb74af8f5327e7fe13f6c5b0d3cdcba0d94d0757461148cc6732978b61be +size 690669 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index b12185bfb77..3ae272f6b4e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4dfe7260742eaff8fe9b9ea1352befd00e80ac1e99a007bed85c74090a9a740c -size 616956 +oid sha256:04e2aa637de34880eddb4f60e4347066a9f91bbd51952151c9f2faf4e8979a56 +size 607928 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 1a2df36dee3..77c5553985b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77903cc809cef0cccf42201aa86d2e6e82af7b5d6b08fad76c1259bf2fb552ff -size 711239 +oid sha256:7895671443e70b528e2d7c16ccc65333acd36a1a586cbf348dd878fb564d4c57 +size 696143 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index e2a3c8893ef..058a4050f86 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:91511ba01f3b21ef54b230087377964512fa89d6537cde711b63f1da7d7f123e -size 623171 +oid sha256:9cbe88af6fbe07c80d964df3403862a115533b13a07d58758691f5b94f946fe7 +size 614536 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 27eb64b9078..e576c442abb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0911d0cf4eee496bdeb0d28abb6e1aa69d4f9698e5eeed714cd9652bb32683c -size 776985 +oid sha256:e7bb68922112e3197fcdaa4fac3c6148b8c4b983a277b80f195492ed2dd00038 +size 758535 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 3c958202f8b..476ece3ca49 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83c15bf94f5ac7a6c1fb37775f258274a18d937aec2ccfbf326282c17c9cc2fa -size 686203 +oid sha256:112848d77de06e8edfddd0d98497faa23d036795ea59d2267c1c88dc05508b12 +size 677373 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 92d9f2fed62..d8113cc0269 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83b8275cbb42bdedbd552f6d3b7dee02ab6c5ea6d206553fe4e376fe2c425fd1 -size 808677 +oid sha256:c2b2129cae8284776a140e9e956bda11bb6957c8a2ac0e707392ee612e1f2c4c +size 789781 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 0f0c5f1c893..31cc083acc2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:debc82a053449b4bb7c15974ad8d13ac6acd1a309181c237ce56bc32eedaefb3 -size 711581 +oid sha256:3f087df0555f4b763bb1539af051d970f77cb95f4b6e0ed1450eb62bb923d6dd +size 698705 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index e32cf9bc49b..8f1cbe8ecd7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2290ddbef127038401a77fad187d3721774835768de03963884e6f1c1051838f -size 794369 +oid sha256:7396b7753afb23490adaccdd49a67d0e496c7f7215a26893ec4d6b1b6dd111ec +size 775525 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 51981f3ac29..4ff10d5527b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d77dc9ee403f6180d2860bb740d1b237a12cdba6698b208dbd8668bd934e409f -size 696041 +oid sha256:838378f94b638f0878e7d17a82728d272ff5ff734d43957d903b77c1de33ecd0 +size 685237 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index dfa7319ffb2..4c86e9d368b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:51421c5e22a82335c6bc918373c72e66f04a4b0d0e1bf6cacadecd96fb78fa87 -size 808723 +oid sha256:09b75b3c08d97e1f802656768bff69bfebd985fe3499426b80986831ad5bb9d1 +size 792395 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 4676eb5880b..63e6c954da5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3df9750b5e5135992ddaefcbb70f68ccc0b95068159a02904cd1a34e0bca92a7 -size 716561 +oid sha256:ab7bf81cc5cf20b9eeef41c6aa15365283763de4b87d521ae8f9bb358e2dcfb3 +size 704623 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 3a21c370ef6..7f1f6d24b03 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:37df93189edae0b41806eedd1de601e06ab3fd9c636fa35fb9f2fbf14460cdda -size 794417 +oid sha256:88c4fa0a3a81c4d6de84f0e08d6d332dd1e58fa6c5abbea7cdd1cbe088b1b745 +size 778087 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 9fc6f55466b..3fde51a988e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2b88de47951b644fdb0310797c53647d31067d787ba741fea70995f4b89724e4 -size 702303 +oid sha256:7df2d6718b23a3cfd40d05ca683afd563009bc907f22dae4e2439ea6ac7757bd +size 690315 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index e7fe6e21bde..d0fd459a7d2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ef2db599ed8f0f264a5e21df041aaf58312fbb114036b787fbd21fd9d75579f -size 876097 +oid sha256:5a945e6393abbd1634ff65f635a78d2cf396fce390736f32c42e4b5126475813 +size 860065 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index a7201872f43..175cfffe621 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b395e26891d4b1028a962365f8609406db737e37858ee859cf7ba69bd754c06b -size 784527 +oid sha256:80b383314ebbfcb97ce9801b4a372412fe63b0ccb2dbe1f9714125c47d1f17b0 +size 773131 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index baf4a4fc275..49d5ac354a0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2ba790b40e5478a115465c9c5c468eafd551e09e89225c8913753ca54a64c53 -size 861791 +oid sha256:7ac4af2e21236c243cf8c5e631d105b4b3f6cba9a7509f7bb7539e62f6c162e0 +size 845757 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 78fe68a4ba5..a9a3b2668c0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09184999e2200ba85388359c3dec057f6c7c6319505b8e1d8a1c2beef5a231c3 -size 770221 +oid sha256:827c85e660f0aa839db967aca6baafbc7dfffeb18e1df0c03fb5ce96334c1b15 +size 758825 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 1a1a9116734..f425d54a1b2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6426941d2a8d006db08436ad2b0d71c7bd48681961b2c9adad77eeb12ca0b65b -size 664867 +oid sha256:f1523537bd4c7d6e8fb04c1390fc25f7c86bc964b897a27db3bcebd90db4fb76 +size 648835 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 251c62b88e3..5de40da7807 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e45b046baf7a3fe2ee89c8281e1e8dc6862d9e92fcea2ee39ce65eb5da9c1bcf -size 567524 +oid sha256:48a1b5a24df0cd0c32f7646a0fb1e168dafe740f6b9acde6fec1915e5f8f3859 +size 550898 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 961991ea1e1..54b8f9a0e82 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:032700c96c2a5eeca40f35c8074b6122ad40fc8170b7441cf07271ac86e7cef0 -size 664077 +oid sha256:6725aa77ce6f588d40386eab638d7f9bf8e18e73c9149df98e4b75d70aa28187 +size 646317 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index f025f6927a2..9befb8e0ddc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c55e9398ea3866cedabfaa5dff95e52670c1b7dad4c6e1029ecadf632636bca5 -size 583704 +oid sha256:658b9cc00c6ac1cda9c00f632a4fb7db6c0acad64db5730e7cb0a7ccd228134e +size 565500 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index b097d4483ae..d5bfe3292c6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65d1104c88de039e2f8941bad42ffdeb556e53d4c9cf1c7b428c11f43ee31a3f -size 736975 +oid sha256:e6f746cf81d0715f7738024229c92180036367bc53fd96b1cc45bfef89b8ec3a +size 716799 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index b744a2c4443..f58b9aac3f8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7013bb7545f3c65261dd179a5c2245668e635b5bc514c396f3fd124860d457a0 -size 635835 +oid sha256:781c53c7f91a63533360591e11d9a1aeb1caeebbc6a79bfe41224532a82c2380 +size 619159 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index a7f47d61267..53d4d4ece81 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b2c96cd02daccbba25e82309b97ed363815f4f0851bca377d7c05adfb1b736fd -size 783665 +oid sha256:3ebc9cd77edb7c5b532e2db8da5a63bac9d5fa5796cce9f0f30ba55692f87bf3 +size 766941 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index f32036cc10d..e420433421c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c5be78c5afa22757e8402f8aa5e39b17b5dc23c74889d69c5d99ddc2b86fb170 -size 677689 +oid sha256:ee326d6e8e08d743cdff15923d0f0a0ac51c95dc588697f8b17b7b8329103c87 +size 660273 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 349e7c95e71..4635075940e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a69409f40a294577b99b5455b1c1b5def828e9157da4f3554952b78d8f67d70 -size 755889 +oid sha256:fd35259f4ee4125b10f062ff35cac8a1d6270075a8ab61294f448ec1967040db +size 739165 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 388519c6faf..3f42dee42d3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f9c6c562fa1aee305a2ecb9c9ef8c108941d1d5f8aac05be84fcb2d0a20df459 -size 649913 +oid sha256:da65eed78c92dfc879ca0b7524a136b7a3343cb779edfd75fb1dee6ed0c4fd2e +size 633239 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 3d8e32c9e3b..31086c364d3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:924b56e952db2d170225f84fcae83e3a7d3256c52ce61e4950a231e7a6d298ca -size 778779 +oid sha256:880aae9e5324a5f1cdcafec4b2aff33d6c95ef4055ed385bfbcf37fb45224538 +size 760821 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index c1ee401ccfa..f49b2cf7528 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f88ec8b9ba2ead6e33b6fa4625e5dcd396ee1bff79769b4ec9ef86380ebec13 -size 693819 +oid sha256:74e050ec152c0167cc81ab665ac0b9390b62cfbff4c0d9eb87e03281cafeab1b +size 676157 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 9cb893da8b3..2532dd58a76 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:565976c658d6917febfb3136201cd80260734aefa3ea3f2dc09686cdd400ac78 -size 751003 +oid sha256:35cb6010fd5a87e9260d709499948ad59b6c295823aca78e47516c3f168ce887 +size 733047 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 57f9f16e565..4d7ad6d3491 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d5f895d5c092c4c03eeaa77ae84564b73bfbb211254438452a57ce54debd154 -size 666783 +oid sha256:a17bc9eca373d739e71b81a41a6ef48980756805345702bb0a36c6a603d4e7d6 +size 648333 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 20e2d4e016c..2cf60a905ae 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43967a88dc76e3d3483124be5c60cbb7bcfde0ccc29ea3d1180a350d32217196 -size 847829 +oid sha256:37d0571f474f9b8dc064d284b269d357ced8beb81865c84b92e395f9944b06cf +size 829577 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 6b15ce0f638..af1f3a6288a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ba63b5301c8f36241231be7140a0f04d1dfcd7f6bb19b854b7fa0a94b5790cc -size 744123 +oid sha256:e3388ec061db35cd16c6c1086462fa75e1d57550cd48e87a6955f28953b6cf39 +size 727645 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 5eba52d5fdf..0722cb8f9ad 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32def4bf9eaba2624bbf80134df0f29ffa2672c310a31ed562ac11f16ddc199c -size 820005 +oid sha256:6701949d86e0a23b031afd9f6eeb51ab125a3cd12152cd31dceded9a8ad81839 +size 801801 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 9f88d5b0742..76482c35c87 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5eabe5fbbf6b638eda75ecb06658b67d3a984c9339e2491f2abfd5887ad56afb -size 716299 +oid sha256:210aeefa3a21cebc7d96eb869ba7030ea2ad11c996fa59d54e36cc1f9df0764a +size 700661 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 303d7152a4a..6f5ddbae417 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c66c24ed4495e8e8f708c6a5419af6e5de24abad28a8415fbf465f3482b16710 -size 634573 +oid sha256:6fe6bef3d7cc4e53f4b9a134ea8793f8cd73a8a9b2ca0b2d18c4780284602c9b +size 651543 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index ddf02657fe5..c61d5990941 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad94b35e3ac66c0da25d725afa272d4f90f579d3b34f4e4f872357c41a120c54 -size 549414 +oid sha256:ac45d3f89921a7aad886934b2b9671aec5c2200d779dfc3f91c21fea2c7fe107 +size 566386 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index dcaba059579..cdfe69de5b5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab37aeee391b31ff6479860dbb9f32f30e23e3f839b5313d9a5b5cf0a9104e76 -size 634571 +oid sha256:9abaf201ca7a82097699811e12c834a09da296b03b6c3091ac2f14fbc26c83f1 +size 651937 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 4b0b04c9ff0..2cfd933e075 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e187628583a166dab765fa8dfad5f6441b14077927f4751e0696cf31b2ef6dcc -size 555974 +oid sha256:0433a3cdc2cf628d0b892b56a0067b4976f53cecbd027f18a54b50f37c1a348f +size 572944 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 01ec8b2950c..dbefa6bdf56 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6410f564c4d72d6d3c0fd2cdc4efe849388ad8a35e8ed13ec1fa905af2ab2f29 -size 701599 +oid sha256:90d1242e7bd4bab15e4fae01ba5f0c2b3fa531a66a171b5cd6f289b42c93e010 +size 718571 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index ca02171d668..a445163a6dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0b1329e0cc50ad7525fbe8f427385d5eab216ba022c337dedbd57543a7e11f4 -size 618317 +oid sha256:fc8fc8dc3c5100f230e76ff97e5cd66472815497a28cb0677155c108cafa47bc +size 636077 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 7d1f2f6568a..4f697ee6b10 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c662e5e02034ef31fc78342ea3daa45bd20a768dd6381508bb8b9b1b16c738d9 -size 721501 +oid sha256:9d85f5ae4e892f9bc76dcc1761049ae062005f3cee93bd84ab1e956b6a17fb76 +size 738569 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 27af353835b..37f362f960c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0242659f947bb5571afed276488884b8b217138bd9fe9274c6a0afd263acbc81 -size 629387 +oid sha256:97e0833d6bed69554dddfbac73ad2cd8e9cb4f444b381b4e3fbaf6b7d1551cea +size 646357 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index ddc4128e79a..b87f8948517 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aca1099cf2d963480e6b56c19952d99e748514b340826338ad32ead3c2e7908d -size 714445 +oid sha256:c1a9d85c1906adee80fa45bc1887a4684fba945cca9a77b8640cc7e094ccea51 +size 731417 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 382d13dbe0d..ff48d5c5e51 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3711be295215a1b8343a8496dd3a5a139e40ddd9211d1004d057e921a212e13b -size 622233 +oid sha256:ab483c3aa078d4c39dbbfe7d976feb7217d20649d2aad4546270a4d2c4ae9d30 +size 639205 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index bc8aa54c91d..1ef5471aaa9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:842658f64bcd09425c2026d4c3cb5dc8958b3e2fca78a2bb80066657027ed457 -size 720611 +oid sha256:ab3a7922a0b114770819833a48c0f21b91e2a8d687f7b2fa3a9486a0a5ad6835 +size 735953 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 3f03af0ac2f..cf90a20d7f6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:377a21f1bcca40d870a5fb7d9f446d297cecb43b8f90081cc4e2ddc8b94f6d5b -size 636095 +oid sha256:f0bf98b6dffaaa094b0ad68cef8db80f4e3e51924309abc4f28d6b341ad69e4f +size 653065 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 29bf5570460..1299797bcbd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e1ae83840ee5b740a8f899fe56dc7b69ccd2698fa5ecfc525f104590cad2cb06 -size 713457 +oid sha256:72759a74d8df5e22e856c3867f32e4a87b23499e0436e11886474b1d53318669 +size 728947 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index a8675c8b928..32d9d7783f6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:98bfbec70e0f0c12528bcdfc969a2fefd7b48b073b29ceea35a3139b189668dc -size 628941 +oid sha256:64dd93ba55dbd642d30a94b51e5b2dcbc5242e295f2cc56ee3617ab514d43fac +size 645911 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index d66c765d8a8..58dbc34229b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de538709b1f6141b97dd77b181ab3a87c7112ef34fbd9a0bf2a156cd9c9897c3 -size 791289 +oid sha256:c63a37f832ef1a613cba017b5871744647d318e291b19ad55d5ac478beeee897 +size 808359 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 6caa4d51d97..d27f7beb0c7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15eaafd19597b4c6a4355c95aff962c7a2639a4e9b38ffd03c4ea0706e9fb068 -size 700509 +oid sha256:ab62fe832e5fe573d11e7f09eaf660faf490238beeaa558710b9b170cc01be33 +size 717677 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 3cc1e807ddf..cf57533a37a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:066614c1d70b60c0fc9c89c4cd96b8b66371d51d510c0224e9eac876fb396c80 -size 784235 +oid sha256:80ea545fc61d5eb9fb3b616d56926e32c88cd5442b387af0b5c41da19b9e7e42 +size 801205 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 1030c44e382..2551fd80c7b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ef07a304f9729c84ca5e3e7e4c65952cc8e0feb9def9fe4149e0a3febee70fb -size 693355 +oid sha256:c3a3e1943f3cee108357c6cb4594f4bf075bbd464c28136f53b5a07b80cce6c2 +size 710523 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index d91b4c7d3c1..9ebeaa26297 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:625ae6314b16bcf55fc91e87a78e023bd416e15b719bf2bb2bf46ba7f7fc59ac -size 745197 +oid sha256:4d3933489a4ac46f2d3b943aedf956ec9c68fa3dafb2686be9d8bb6fb4bc3f47 +size 724575 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 4358464713a..429da4c6d1e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bf2e0ce74fb45c9191cea18029febf3cea7a381db31b0054566fb850cd25c95e -size 648593 +oid sha256:bb2c4ce522c129d9477967626e1974867697498b03998421fd906f686c465cb5 +size 634583 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 1e0c5c6c60c..46b95d4c410 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:57e6a4ed6094812e18cebe7c82607435e7f9e84ba8be8fc671944431abe636db -size 745835 +oid sha256:eacbc8bfd650940b63fffb81fbe3017c9ca6d2c538b8b6dee7e16c8902de5444 +size 729951 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 3db9eb7d6ea..9eced4f7816 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8e1850abfe1fd9ee43d3a4ee405ae72a62dbcafe6eae9da9f11e4520752167c8 -size 661171 +oid sha256:2f12105522d1f1f2225fa80a52281dd4468a2836ab02c593fbb23c0ef4aa859a +size 647359 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 1d1d83b9e57..fc6c76ea4dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1a7669e214826569b22935bdf8cd6dfc493ee614a71b7eb6886cb588175ce576 -size 852499 +oid sha256:449e7098f2383c33d770503d77eb9f2f5bf5df0345abf3a242616de7e0dade68 +size 833851 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 5960497a70e..699e15b30d7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d969840f429dc2e45808e9faf2751fd8600c8a6d12c3cca84b5e00ca72ccad3 -size 758511 +oid sha256:b1b403f8bc308b222cd4d5580eab2ce5178c826a17737be4431c576d054f52c1 +size 738679 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index f362e4fec59..30368085481 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4717c642523b3dc56275ce05cfdc9db014ab8b7a96ee0419c78378c9bdcde58e -size 832667 +oid sha256:6ca9ceef8f90c5f3ce920138070cd3f9ebb5c0489258584122e64b9e2ffb3a29 +size 811601 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 91eacd913c4..06d3da4c3d5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:59108562fd4f3da192a179e3d177a152944250e51d34b7db166e42e5bf80a00b -size 737099 +oid sha256:9cdb950d9aef55d740cab7fb820e62ca07566d4d7fc313abe3b09a9f2dbc0d7f +size 714555 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index f107d113432..7c72e494478 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0161a5e66a899a78d13db7c41cb6b819eb0c0791a02f146ffe94939a6d433fe -size 851213 +oid sha256:a34c7c0682edfa319d5ad4c24c0d33e1359bda50adaa6f8c26da5feaaf959527 +size 838041 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 7112b74a659..956acb39433 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:20942854c7f9cf81f90367aa2dd3b44cd5c752dab85cd5dd501db23a195f700d -size 778193 +oid sha256:9170e919c3c939d8220beead1890af480a5c3f19c3dd83f2f1ba0529ed87ab56 +size 761517 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 477c6cdea49..9c03a4ed033 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd6781f1b721092a8849c1ca4f050635678a34365c2c79d93e1251916bd4e615 -size 831381 +oid sha256:1af2652cfd0a09423cb3a72b4eb09c1087cf1cdaadd977bfaf1edf9485108dc8 +size 813325 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index c4bc683590a..aab2a248576 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1ca520a031122ee99299c4771d6af2c5b6b898136fdb4e821e098a855f9517be -size 751109 +oid sha256:5a473e3320b511bfe30759957adc06756e18118b10390608b17cfca062bdafc0 +size 731621 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 04792381ccd..87ef5c7fbfb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68086d0406055986c0f5ce359dbf6882e546c1e5709b4bd4f725f7542514770a -size 745201 +oid sha256:48585587a1cac1c577d9d5d748ec5df508c3adb0d88d55fa38cb41a063ccfa50 +size 725369 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index b630ab73106..8d217e1fb03 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5568b5f641f487397e965c6fea441de3d8c2453a777d6aa8eb3babac5891612 -size 649387 +oid sha256:c65e460582d1dfbc6983d5396b0dde71a8cf69011692aee8455e558ba91fe414 +size 635377 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 73a20cf0d03..b0341aee51b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:291c9fafa0ddbc6755b883a221f6e85630f6e10084354d61687fa8ae6336dbc2 -size 746629 +oid sha256:0e99198155dbef5faa7db3ef3002e329a4116aa632092ba189c9016905ca2c8d +size 730743 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 19385ce3320..eb84303dc76 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1c4d3e7211d8c50d5e95567f3e479243f18b0e04d5e8d5d80d6a0dddd6d2c51 -size 661175 +oid sha256:bcdfda93e90d03da19358d1079a2379e76b46f3dc4f0d2b63a64b14d124d604a +size 647363 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 59ec849f2f9..dd186f497f0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:154f6f8889882da3fe28719594e9ba74db14fef18d0d1270f4c0d8cc4e353f2b -size 659927 +oid sha256:83011a074d1a072e864a0f3dcca2f7ff78e1e3ddf9ac5bc4d5a5b3e83dc32f7d +size 630425 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 50e5e63ee88..1121b0ee225 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f85a43fd8afc1e9a847b1c869b6e7b5b35564c7899b582c0283a4e2f8a7097da -size 582414 +oid sha256:fa8a68756b62dd1f9878242c1cb036e19140c1bd2fa3dc60a6667e3db93fac0d +size 553012 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 0eac745afa8..6b162fbcebc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3f6eab5a344c4b4f9a64e3ac5e7ab66f3623971b3d50262893e39c5aa7f815b7 -size 664019 +oid sha256:776b7f54efeead87ac15e1b6d6ab6095206fe563a4cb5171048ff122948be00e +size 634469 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index b10ec53dad9..8e364e5f77b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a6b90dd40a1ebfe7a518cb9b3f990a961d29bc394aff5c55ec66e7cbd68259a8 -size 588234 +oid sha256:b08aa8716c2644eb0b9b8939a5b49f7f663d9554bd3df9b95896f19a908c7c65 +size 558042 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index f81c2c90748..0db7f065b86 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:224ba05790496f9749d76ddf3b75581583140315980a222269338eb260809f94 -size 727003 +oid sha256:bf374222cfbf80fffd88c742d1fbed5f6214a34a854cf9d6544d4e556a399714 +size 697451 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 9ff3e44f2a8..c188d85e536 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:230e3cc235aab21c8f3a2dc39bc62e377ba5b00e997a736127859c4328bcd154 -size 651465 +oid sha256:19c849ee0972453fae85fef1bb92b1f715aa88121a2cbe152fecaf747bfa174f +size 622507 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 75680ecba9d..fe17243ddcc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:013705cfbff57e211d57d218ef66d3012e6ea33cef01831461b62f7ac8f707cf -size 746163 +oid sha256:217258a853126e3d498a76bdd90f616d837725530d4dcad8e0e863a3312cdaec +size 716613 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 4ccc114e912..2aac0adc1db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3ce1129a69c6439a75fb9250fb9bb1e327d7a4144a724d85011ebecc7da8c9cd -size 664409 +oid sha256:e601600d2720a3ab7fc5df47ca4be918a4ea3c59a7d878bb0db6840bc2ad183d +size 635401 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 6d6f2785b6b..e6a6f98770c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b7430a7443595bf315d3d0b0e71f00754ac28c9a180c56325cce2a5a2a03003 -size 739009 +oid sha256:41645dc2932cffb899622fb279ccff25c1500332285147599607d4d9769ad427 +size 709459 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index b71f9660a6e..0ae4f366684 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc9c823fd777e703e5b15cb00cc1715ab0b6db86cc037f251cf36097a9614b3d -size 657257 +oid sha256:6bd240d63d05285d4703ad170fa0ee6d3e6cd57a1712a4d40d32362e9cae2b6b +size 628249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 853eb61b782..eaafbdef14e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:60effac5eebae90c99f61fd4872c2d70bfbbed206442d609ebec1ce0235f7e1c -size 748479 +oid sha256:01dbc73ff357ebba21bc72ce82bd31b50ddf5d9f36c969c95fc9da6f73ee135a +size 718929 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 0f9f092a3ef..52146c48012 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9a040b2b4ed59e891afa05d20ee709a1c8236d99df9223f25ab5566659f3876d -size 668059 +oid sha256:8b1d2b6f1bc69c27db41f4eefd04767b8fda9e97d1950c896a5d2badc9d2f445 +size 639889 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 1c0e83bcca4..e540ba17ddb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7eb016a873d783ebdbfbaf2f483bc1eaf1f2ef8b80cd23d6b635a1d2822a8f2c -size 741327 +oid sha256:1a2d83f91b60bdf5cfc1abd165b1c9379b390cc43b4b2e05fceb31f23d892d81 +size 711775 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index d1f137aabe1..67de29eae4f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:685ea43d70584833a1aaef8a543ad7b23a0d6c511a5d461f1f1a88f1aa1a766f -size 660905 +oid sha256:03d816f023ef9be07467900f190324b53a31eeb733675fd2049e6f83ef319c6e +size 632735 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 73e892d34b8..7241472fb17 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a83a0392d56385c3e77514d9b161f9650b7afba4c5c69ab6d5a6683fd03a65b -size 815607 +oid sha256:082c950f3c970802e3d7a5bb71ae669567d0356e39bdb20f312ac96db3330654 +size 786057 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index a39d2464f1e..8c1aacffc8a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:437c7d45771b46309b33db864f8fffe6b4b2522777ef5b40d7eb41da3109b09f -size 735137 +oid sha256:b15b683b53d5aa8640434aecd3a090ec7b6a41ce99dc016ffbf12aa20bde3163 +size 706967 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 9bdfeefaacd..76e87013a61 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:26a2bad638f6341193b82331963ffb82bf55992875d975d872f4b5fefde42906 -size 808453 +oid sha256:0856436e0d8f97af0c6455b83cbf59fb42cf75029963ab4312ad2d881d3d90ca +size 778953 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 326a79748d8..27de247e4df 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3d8ae6d94dfe0b478e81fc7b98621a7cddb7508fa702e04c5424589114957bcc -size 727983 +oid sha256:bf13b89989862a16977760e60cecf799de389de380bd5404a1aaeb48bb294875 +size 699813 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 1b782c5285e..fae2ebda0b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1447b3c34e3bf012870e50c3c06e3aebfb2d4ff3e9426c8a0cf47804148ef18b -size 653711 +oid sha256:e362f48d745ad0ed40b46a65716b2a0a4e95c144de8aa5e5fc1b7a69d8e71ae1 +size 623371 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 8116c66c213..278a4eeb3fc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d2866448118c7fc375bbf38b76363fbff63e085788dea8e3568617003655336 -size 566086 +oid sha256:6ec2d2a9178fa8c263148b39d60af7e8f8937c5db0aed4e46712663113e214e0 +size 536682 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 6e507ba7b99..e3fc612fd62 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8740ce6603a952fd18ba7398af5419e4aecbc888bc3ae25c46471ef0a59b0e1e -size 654201 +oid sha256:5f31160b625ad0f90875a73981f6593f1edecbcceedc82081ba842fd537939d9 +size 623813 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 9930a0eef42..dcd2cf169fc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:deed434c71aedb090631f9c9f69a96ea70df707cc82ea90152eca4131f66ebd4 -size 568994 +oid sha256:1add629d3d0d563e58865cd211585bf2495345f8f1ec97efcb5e21071e5fa721 +size 539098 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 8f4bcc4153c..540b5e29297 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff0bdb29aab7bc6dc4acc8373e1d27b633010d05ffbc54312033791dbf7fa1bb -size 722711 +oid sha256:00c2a07a40b6bf3f162a0fef8bfcbf7da91247689d803614a5958227c1969bc5 +size 687339 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 7c6fe8e82cb..b57b05045db 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28a70d7152bafa77c5f5e93daa6af63e58fbdfc2bae7bbe547c6ec2d8a442d8f -size 632767 +oid sha256:6f388ea2f5895b3a60844abadbe559625d521df307331ce46bcd1e405ef03afe +size 603414 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 77b5737c7b1..4904893d42e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7aec32da2d41ba87680f70ae5ac4ebaaf0b8937dea77cebf55dedf61be5282df -size 745817 +oid sha256:5acdd4d476cf598425ff3f8bcd7ddd7cadca562c3d6165729b8d64ab0ee4a54d +size 711531 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 4dbb9c4686a..90eed69a005 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:400b70ced97a6577bc0df7a12a1f70f933bb059bb26f221241da10fda183c649 -size 655431 +oid sha256:496da5737f2b2ffbbe5890dfb022238db3ee84293a4f764e1f26988fe8f521ec +size 626077 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 41fa58da03d..8c104dad09b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d3e5b44b945bdf8e96759f870b0949d5dfd90c2bc72541569b99f3c9994fc7b8 -size 731511 +oid sha256:b084eb2e08912b82b0c2257b7fa29044519b14f9008c259f465e2f1f4613846c +size 698063 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index acb52046add..75261f6af36 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41dee847b79d1ced5f4709ce3506c873f22df5be81a03413e647a985188d6c10 -size 641963 +oid sha256:a96b4f2a8e4fe2871c779d63666be9800036c4d782f035653381b361cffc282b +size 612560 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 286ed51e498..6ebefd40346 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:395037e453edb6267f473c2d2d15306d5c2acc9ec0bffcbe6f971092b6f4fe9d -size 747197 +oid sha256:376768c72e6db7580b62683c7cc00eb5fd08fb8939d9e3ad3bb092b0bad04208 +size 716807 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 11c56f8a2e2..e8cbf95fc4b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ee1b9331016141ebb0f86ffeaf0ffd86200e7578840cbca649562cb1638bd18 -size 658339 +oid sha256:c23ea63d4778a129ff7b74d6416e13818c55ffedf000c19da6bad53af2630be4 +size 628295 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 820184a63b5..10db7de8731 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aaeda814fbadeb68f755e5f826cdfc2839479b1771366d6d0148e23f83dfb78a -size 733679 +oid sha256:a072f806fa3e95a81172f4a251e05fc918e2a62d525beb9e2ca7bf6014a8d907 +size 703291 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index a7783af1c08..92d8aae9eeb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3480c894eb614f7af04b989b5ddda77ac0c7a08d82fc269fb3252e9565733413 -size 645661 +oid sha256:56fbb9c4e7f06d272da0d3110b728677818da49cde20d37f05d5c4c7b962d1f0 +size 615616 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index fd3096fc837..666e7188a3f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d6e5497e60e3cda0c1b6e988814167d10087ad6a7e4c751255ec52f0d235f5e5 -size 817087 +oid sha256:fdbf73127d408f34ff880959d9bf0cf6aee935f07bed85ce72b7582fab966260 +size 783737 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 3bb0796b4ff..929db4ffab3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:578665eebd88b393aa2b5dd0759fc51f60dc082e24266e864a7ea3c125408dfd -size 723543 +oid sha256:b2cf6a1c2778673944014e7286dbc74869de3b5500916f7a744bbbe76f232201 +size 694289 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index e6e01dcee1f..917096e8944 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:754664aa2e49715a6d3ac8658e59a8931a607eebe9b20a069c60cd9c316c96d1 -size 803569 +oid sha256:6bfb352fda02b1ee42c539296991560564c58dfe9bf265a1e31304a3006725a2 +size 770221 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index b5e4986617c..04960b46c7e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3029efdce85a89e39bdeb7b8f5963afb8b5a0f2d55cf60b53e199bf377d38396 -size 710025 +oid sha256:fe339ecc50ed556acdf4b5d037969263c5fa04197d34d90e176d6bb787f0d6af +size 680771 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 35ba2122647..b291242fc8e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec52ed8bde4873b98a88db2b5db30866065afd24731dd3d20dd3a7082a76fb17 -size 621541 +oid sha256:7774a8465466ee57fa36a5f985de6c17e72d99c59f42fb20eea71c8dc459b83d +size 621147 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index bf42d5d2f6d..31eb3de7940 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:59c60dc01f29ccc310e0bfa41ab3a84a4196292c506d62cd434c8ba3675d043a -size 540132 +oid sha256:595cf3a0f4ff1d5699ff0091a8b4d42a338e1b59731a1d9184697c66a7a53b1b +size 538948 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index fe6bbaf88cb..6ff121c2059 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c269f3afe27bd4f94e3f190ed79da7aa4cf74da015563f8040dd1495dc04960c -size 626719 +oid sha256:92f01023f41b8e5f77eaed6adb072b6e512ce43b69ef27d24d6d0925ce0b8cac +size 625535 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 4cb81079cb4..2b9a5d24bc3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:058415bffb2c53318f3547382c3eacb4ae1d8c9c10da8e80629888fc0196b83e -size 544472 +oid sha256:12f2294cf2497da614f7439cf7d5e720e34cb46a48fda60cc18f13337e908820 +size 544076 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index b6e5e2bf59b..0367b1b392b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:80949ccd9859e2e5894d8d6de8c925c92eb8252e8667606f7eb907dd2b3c2d18 -size 688567 +oid sha256:97982db1e6b4a3f6cf935f94cca13bdc84138bd9733e1737733f823ee93d899b +size 688173 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index c872c0edc9d..d31302379b5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad958e88ad9fab218bdf084dd5e4aad581a086b11bf25346a6d582bbe266b2e8 -size 608392 +oid sha256:ef843787329c352b21da28fe958ad742d3ff5685623c618ce763b49b0bc40bbc +size 607998 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 82104d43fd3..69a2d4e49d1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:390b265610dcf612acf17692043610cda047afede0677a4c6424d59f05b8be90 -size 707679 +oid sha256:771d5e0c3343e98aec65fe1783e7d97defc8182d5a03f56a31d39e254b85e9d7 +size 706445 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 7060b436faf..6837ac075d7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:109b180a5d77e9a6f94b931ac879ba595aa0ae68956c7a3331433a8c229049db -size 620943 +oid sha256:157bc1c44aae55424748d6cf35858763d7f3728433f2390037142bb3de71c415 +size 620549 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 641a11af140..b9aced86fa7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:127d1c260fe0e193c06f24f5f55e1f7bf732de6962a7ee0d539d32b7df7437fd -size 700525 +oid sha256:54a7ad363f22a89ac8a5cb4af8361f5c469c529dc57e25c8af70548186392325 +size 699341 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 37473fab354..236839fe6a2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a0b171dab4fbfb363c3eb887d6396c4a25a43742a169fc3b8558feaa14427bcf -size 613788 +oid sha256:f6abaa342898a11964d0faf24e1b837abb2d94c0da499eee76cb6891afa8f2ea +size 613394 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 43bdb899958..92dc0c8f52f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:942db415334eb35208c73fd41ffea9e6f99dcc9abc042587305d25f3f22ea40f -size 710341 +oid sha256:2484f04073e9ae6d0071a9b428257a306dec87e3c0ab29e27bbeb29694fc5bca +size 709157 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 8f3168c8803..83239a1281e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d5be1c9e814aa3981c10f0156649377c28bcd8807b7c7aa6a7ed7d6a71414b1 -size 626219 +oid sha256:6119d1fb159e8f39db25838ff5b870c50b33393f9de2c03ab6a74b812ba62b08 +size 624987 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 612f5eaf0f7..f1db4a1de44 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b20f9de384425e528c573c94a0960fb65ee1480b36aba0e5121c5d56d9a2916c -size 703187 +oid sha256:7adcf00410c47eac0e1138dff1bb9271c31fd383519066db4ffbd91c5c02467b +size 702003 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index f5a5edf6fda..17fa0916dd3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c54eab5ed94754528d97f27c2a274134f007fcdad90bdab9088774a4c4e0f91 -size 619067 +oid sha256:affa6aa5689aad76ac921203a372eadea356e1f1c2106532b61f28c0bbef4e91 +size 617883 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 3683f41c4e7..bfedf2a780e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db3c925c69348118d552f8a1d10df26ea95638c693db19a6f53393146ca68379 -size 777221 +oid sha256:651687975241b13ae381db9196701c2066fe63d9171b97981ddccdb46dcdadd1 +size 776037 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index b9df50b31b8..bd58e6fbec8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23c77fa3b0f4e25332fef4d2f63edd4b78c01675803a006bda6f420ce4ead4f1 -size 690881 +oid sha256:69ccee80b7fbe97b60725ccd6a2e976e178f2f7de9272bcee87d96abfe53426d +size 689697 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index a00fbdd434c..8134b8472f0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:adb9ab77d4d04bba15e79689a7db54ae9f13328187e3010853be97e6a6eead20 -size 770069 +oid sha256:c753022f3a4cec3081823eaa63c870db4ad2a73d1da648ad73b05fe40ff4f534 +size 768885 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index d81207c1bc2..a18307800f4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e6e9e825211d2d8cda2ec526ada3c2b5a43f97a77938693780ef25c59e896847 -size 683727 +oid sha256:a746876308fe46d0f29c81776cd663778883188f156d4966f6c65d26876e126b +size 682543 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index a3f097d89a3..b7833588b5c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da4efdf46f7fd99373f5c8d58387004dd4c5970ec282c21ac451a4d81d26d936 -size 686729 +oid sha256:773cd1bbf088e7cf3f29743c307c3cb20abd0b713f71e244cf5cc2eced595967 +size 657177 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 4bbb667d326..05586a6ad37 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d84c5c56d28bde53659eb8d69b52d7848a5dbd36ecf2cd76d0b0e9dbd4015076 -size 598512 +oid sha256:fdd156ee7fc8245199bcf2fb176d46b846dde6baeff52b3374b79463337fcc1f +size 570588 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index c0118ece395..46f9198452a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4dae466f9bc50807b5867a8e457aa43dc39172192dfd2e91920905d7fcc49986 -size 695803 +oid sha256:9b0946843bc8644e5c9ec89b32bc2faeb384b98c4d26dd5e2821057f4068d4ca +size 662455 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 6d41baa1652..37bfabecd57 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a2acffb01412eb38ee01dc853d132ac10827011883de74c2654d154ba2e5b54 -size 615134 +oid sha256:951cfbef15fc5d5e3eefd5eafd0b76f1d1a74a3d2c188c4183c9bd2d9b5c048e +size 581834 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index dbe4a0fb828..ad56a1194b3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd61ee98f339ee60bfa4f0a6a7c31c180a40df8bc76f6aa7a6399394196e36ec -size 776517 +oid sha256:4c66aeae3151dd2d32a0964f66dc921a11bb9474eba48982cefeff1068ea2cbf +size 746967 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 12a67cd8d2e..d27282fa5fe 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd968f33f91e231d51259cb1c8058ce632faa9b827d116e953f70918cbd10359 -size 685243 +oid sha256:58f38fbb2b448aee10e74e15509120e479e52f494c0e2dde17a74bc584662a04 +size 657319 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index fa86d29aa32..bb0698d93f1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b5e249414c04f31d60d848b9882e43c0c2f30c9231ed0040fce5f562b7e6da32 -size 766995 +oid sha256:d8add50982aa2e801590394147664738f2a282dcd0fb0ccc96dd6a432cdc095a +size 737445 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 360c289dbda..b090fc7317b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:feb742b225353a7b3b51097ca18ee71cf102817216dec4e615a5304146a80e16 -size 674931 +oid sha256:a4a5c83a8f70d50e6cbf01c0448489e1d8454b493bc4227afa6b5cd591d492c2 +size 647009 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 3fd9757b5a1..6994afe90ca 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c6404b451e4369ee9044831b477f36306cd651c70f023aa079a7928cc42c7cbb -size 786481 +oid sha256:58955a7fdc325a60f13c8f30c439bbff297804df376c11d81e6c5d3f4865157e +size 751849 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 9d175175e8b..de3ef018c1f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6bcc740a67d4b9d33b83cb5f40eeb83bec24fd2f82b52671c7132f528cb6b46b -size 706207 +oid sha256:800bbf42ed3c373aa774db41a66050749846786184cddd37e4028e148f77262b +size 673351 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index b6431b0a4ee..9b2bd9e1cab 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd1b446d85997ae55dc58483bd846d45adf857bb881cdee23320615f1e56d616 -size 776909 +oid sha256:eafa0e1d3703602dc6a1603c9d3a1be9ca3e54a38f201b19af9f0f075a7af137 +size 741833 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index df7384251df..fee2d354e94 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3fadc7cc54dd2f75b1f5eedf9602646896f8c37a43d1540d4e68c38d2e7ccbb4 -size 693035 +oid sha256:2f74d1bef751dd02156495e3627090c48b880c6af825831367f5320837c40cda +size 660229 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index fb616b94e71..440b70f6d04 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1e9962101ef80c0eb40ba011e4dc23a1a651b1e6119aa01e529925a0af950444 -size 687521 +oid sha256:748252ab2c73558b50e6c9ed541f358039ec03f92c9a3f9f49e6c8027c5128df +size 657971 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index b4a17319b54..f267cb8f376 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d5ab0f6dfabbfef897ab00be72ec5c02933de9255a64ad9becec4351c512267 -size 598516 +oid sha256:047b470939e51bd1a26671a79f20e5f7cd209821deabe6e5895fbc6fb1252575 +size 570592 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 2e1175320c0..a9444bb7d6d 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:81a9956421b27fc36bd24e1500e91c19571077173dcc1513d583fedd616ec82d -size 696597 +oid sha256:42f15960ae4200d8faad8b0fc35febef01638d612f4b016303d14ded323cc464 +size 663247 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index c1fc4373e02..296b0e586fb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86e68100971b61deea16fb896d0a1007e2b97986bea90b9561ff5bf9c8c3249e -size 615928 +oid sha256:c283fb840d5b4d7f8af6d65b37360c7e71de4f0baa12afc9cc09fad9e4067299 +size 582628 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 3b13cffb27b..47ac33d66cf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:521a0b129a5f2b04bebdc1c3c0e8644045a3835423afce543d040fb243e986d8 -size 733573 +oid sha256:edd69751c952c5244dc9a5861f623de0dd7c1116caa6fb1834b85a485e6ab96a +size 650891 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 72f0a8b2906..ba206a4cc57 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86bf03c625c5c011ff96ca362ebceb4ab8977c823791f75ff31a95852f2b9919 -size 655815 +oid sha256:adc0e7d209eb6bac35144e9a030295723dccd3ab27a08719b3f7db61b6154365 +size 567360 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 25f9c2bb488..1dd62a3194b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5232576f62c9084ca0a6d45c4f81761825a332d7c8d50a2c9ef2deb9c29be4af -size 738455 +oid sha256:bf9eec7290836de927ca820522c666fa9bae673972b1087226a4fa5caea1a7cb +size 657005 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index a8347de7375..a0ce1e08f96 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:76a69e629bffc2b0e8dee51ef5e42770ed5aec33c6c5a67f3a68e0c5d988fef5 -size 661635 +oid sha256:56eaecad4a5aa0a0ccfb5eb8652642eaf85d22272aa89465b4ac602ba2ef32fe +size 573180 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 6c5495485d0..988a6542423 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a154c79cbda6770367a66a7126ab147800f774c586069cc38b150792f6dbc0e1 -size 800599 +oid sha256:92ba95931565a5249ac111affdaa3cceb9b7a63c5d545879c5c95a9fc375e9cc +size 717917 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 6db43b1447d..84a96395ddd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7521451be1a475a5df321c678893b80e3d7196f04d5833cf79a1b664cd72e5c -size 724865 +oid sha256:acb02e001a45c4ff277eaf7c056c37d7071d95dea40d989e99e67a9811c4eaef +size 637199 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index c51540cf648..1a769b423b8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:374eed394a0190050953a6d5d0a7fc1ce1fcf8d9c465a71f478c98c6c8f42dfc -size 818971 +oid sha256:8f092d3fc01add619f041726b3343d02eb51212c1e87fbbf19086ad3cc0d2156 +size 737077 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 1825360daf3..8d39f63c5c8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8793357d2acdcaf4d6dcfc0fa6e1823b8c80757c3bd104599e91e19ddea0fcbf -size 736971 +oid sha256:79833d45df711cc8b4dd4393cfaf7215a98f8ca4c0c0a5010207e5a86624e149 +size 649355 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 0b538c1c01b..99aeb783a77 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ee60c4629285100207ea539915b111efbb81dad20c6d96d6e5ecdc90747ce2b -size 811867 +oid sha256:ebe429573625380d7241e9f7331dbe0bc16489d4fcff891aa9e0d39962e95831 +size 729973 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 46e7a2db668..d8ce753861f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a603027e9a86836237f8ff166597b9cde7ca2b03b098338b0224a3e1323f458d -size 729817 +oid sha256:b3545a6cef96aabceacc0ce791973f1451a82d86948cc784ccd2c47b8c89dd22 +size 642201 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index c1beefa94f7..5e5d6e452c8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dfab62649e49ce925f2a2ec2d709e3c5821bbfa14cc75740d9656c95e8a0fe4c -size 822077 +oid sha256:f77734cbfafbfcf446e5a6c011253a60889af6bd9663ce7812390b47a129feac +size 740431 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 2c890b5b74a..8b30506418f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:20c566acbb900e19db6b8c0db8230589dbb00e9af4aec1ca9bd4cde4c00a97e9 -size 741459 +oid sha256:02e0c5be04de2866587da5dbcce37cd638fa40faedfb9470fc9765d731f0a924 +size 651375 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index a65b17bb26e..113664810eb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:981609773a5e1b2b54292850aa16f076791016951821194ab4053960c1134973 -size 814923 +oid sha256:4fa5e3d27a6e05f073c75f0002c259e3c913fc6d0429e06f4bd1f79bfc06c8b0 +size 733277 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 2f66aa7b210..8853be5f4ff 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:355c0a229e53f0f14f2bb556cd0a340a973209689c84666301252b3feb5f566a -size 734305 +oid sha256:2fbc3585d4a6adfaf3ab8d277751d0c34de56cb28e62a3c7fbff69e414edd813 +size 644271 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 1710d4f6101..95992871e99 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:beb4d788a0db8d3a3bab8cbea8b966f611ab978ca04f77155eddad1e863169d3 -size 888465 +oid sha256:95cac7e1a6ddef16a8fd9e82f2d48b7a4fece0420a564bdcf57d44afd9187a2a +size 807361 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 0a27620d9b4..1a5a4ef4a66 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a60ee6b2f109703b58d57d110a9a412b984ae9ef3e2c65a0b19abb322e7d6c85 -size 807549 +oid sha256:48ad16404c8b037612935df8192aa19383571a11a9bfb87eab91ee7f9bf9c24a +size 721907 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 4e15440d241..4d41b8895a1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e85ca0305139f66ff4ed43a817a2b76aed6791c8b2f4801fc905d2a66a633eb -size 881311 +oid sha256:ddd4ea8ff3eeb445212ca072c21b136a519a158167d339fb1e9b96b0fd97dcbb +size 800207 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 9fbcdbd5fce..5a3c5b9296c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8aa6c3f942a22134ce93e68ab638e50f178f5e1f6e9301a900b8bbff8273659e -size 800397 +oid sha256:1de56090b219311c3769212d051896800d742d6fea71c3b29a706119b6506c27 +size 714753 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 48d5667befd..5a517385b8b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:550104e0528afc234510ca87af023ff72010ced15315dbcaf332f4c0aaac50dd -size 724299 +oid sha256:a174f4760883eb2d25bfbe40cd522cb3f0d28f1f852b6a907cb27a6ba5e3e453 +size 633575 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 47446bd0b95..dbd6dcf607a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e11515e2935044d199f2c1c31d506a5bd9d27fa059b1dbdad963e07ada48a1f4 -size 634307 +oid sha256:e20ce5b364688304d62b262ec390088d5ed91f348fb022818b5135023f097b3d +size 546344 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 00dcb9bad31..6b5acdc4969 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94c353e21ba1ec897b21ebeaf956220d7c572e176e833234556c1b90fe267139 -size 722421 +oid sha256:d2474d6e1382113d32700961abc648b73a24ed40c068589480b7e24e27186137 +size 633473 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 2a72169b599..25f845e39c9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:581d031da93bd1988dbc196e5a94ed14bea7bf0fafa8993d37fe46163c7703c0 -size 637265 +oid sha256:3a2d882467f3f9b7fa6597085d6ba2353ad50436208666de9363ee929bc69c21 +size 548760 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 21083a2c158..d7a80cda85b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:406ecc61fc333fb62c6f12329f005047d066c2f0064a68e7197af3b04a43a054 -size 791719 +oid sha256:97f53d923024cf4e5f6ee204094971096de70ae905a8ea68e9180bbf972379ad +size 697789 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index c1a70c5863a..437e996a3e8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9340e3ddc40b7c8be4410bcca6b03adb549f8a9cf9d9b7f3a7c0d397e2a8e98f -size 701777 +oid sha256:f5353d92a16aac66dde3e6de3cfad9d85765d8e8d1828974d6f729a7ad9d6698 +size 613074 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 1ce24c56b73..0bf1482527b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8839f6ef6fc5b5137a11c993cdee52013eb04838fd2e7ab7ddd66d093b43362f -size 814037 +oid sha256:9432ab3a4072e882bf6dabd8dc58ddf49a8f86513a1a92c67170155bdd7232f8 +size 721981 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index dae94d31115..24da1cf4185 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:326855ab492a936141e52129b1ab692c3749acc231afed96b7b9e84ad58ed955 -size 724835 +oid sha256:1873f9c7c1fef153c88fbc1536fc3744d0ebe45830027b5fc33fbbf5a45a2606 +size 635739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index d521ba78fef..bd1e3fa56f0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:26e4c26b7ed5f88ac60ba1f5481f22b5dacfeefc3fb5f2a547f4c8cfc11704be -size 800521 +oid sha256:a5085c51e0f331ec8a8895ce3110e54d095db0a55001afdef2d9c486b1652d83 +size 707725 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index cccf84c0af5..711a0029bbd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e727a38865e3d2343bf10a6b73c41e6b17eaaa969ed038112d1eb337cf3762ac -size 711367 +oid sha256:c6f8e38c4f83bbf17e35f01b6ffe4157cc862bca2e8d9fea6069a046bf44a537 +size 622221 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index e0762caeb21..1db0208c9f9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbbee0b9c8eed8408d67056f88917c7cb9b6f9a3fe99035bce8a7e6a802df354 -size 814627 +oid sha256:98c5b691813aff8ee107f88d6934872d6f998ec93956f4538b4673941bb20918 +size 722325 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 549edbf001a..25c255325c6 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3afa1cdad5d9d0db8f1c2f87a94e5a3f6a8b7a3dfd86d03683761d16c4b1b900 -size 727891 +oid sha256:553cfc0ecc9584513023d514eb4acbf59d5283f6be69e0d0b71ff0360fda8ce5 +size 638747 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 689457bb88d..e0dd5ed5a52 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e8e1e06e2e2995a5aee5b4855cdc457ef964a3d4026b2e5510481e1da04d674 -size 801061 +oid sha256:32a0182384d825f49890b1b14ca4c417268a4790b814339578061e02e1e4d40f +size 708709 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index e5a1aa7d0f8..eee2d63bed2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c93b17d377a2dca855a6efdac32f043d0d69466258ee3e0b9969304e0d7f6dd7 -size 714375 +oid sha256:62ac3c319a9709906a8373c9204d072a5eab66f652ad09286e73d0b55008977b +size 626067 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 04749eb8b99..627c344bf33 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3343cbeababa7ea83f9b359fef87448c058e9cc5ea982ddc78626f86e5875794 -size 885307 +oid sha256:0925ae049f00507c0174f3bb044733c883f712c448f43243ecc9bef5b6ef43ef +size 794189 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 7ecd3197535..6e7ea97eba0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8caac71d5db0334e6b8e33f631240d8e50cf6c7c556a1ed6e6a8dbab7176446f -size 792553 +oid sha256:d6ca406a9a7deb4a05437d801fade35caaafc9e7612d2cd9e75adef9ba6ef872 +size 703949 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 7d35fa2f948..5ed36924832 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1c12728312107d1392ca80b5cc3f9b1e52d6acb440c20997052dd244c25090ca -size 871839 +oid sha256:063b9e6092a5832c86d84da04e5e2c4f45746289e5a9516ca784c81fbe2c947f +size 780671 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index beeec1df5f4..95ffa21f2cc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:183b1b890ef4073790b1d84d5380b44c5e98e6a6271bf19f97b973d6da3b2e29 -size 779085 +oid sha256:35a15edfeb58f5c222a22c6988269ca244177bd630bd29dd68de87d6ae1edc46 +size 690433 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 8a225a238bc..d61fafa5f09 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:31b95de4f1cddd68ef19ab5d1c5d7cb86e99b437e2f7b3097f2b0a910face142 -size 661543 +oid sha256:59742af87fcb57118e678950cac6161d7ddf249883444cd5cba804d155a3d3f9 +size 671951 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index a035abc8655..226065267f2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbf31f7f68266bb08e02fddbe7c6fee7e1b1b9b5cd67f1515335ab1e6cc73b9b -size 579246 +oid sha256:94aa4462cae0190edba5219382b1ff2b7e4e9fa0ffabcfccdf0c314ed599b73a +size 544958 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 5af90d57284..2efa9eef9a1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e5f30bbd1527ee615b3c02dc418a1ecbc6d422eb7ceff78cdac6995b829441dd -size 666721 +oid sha256:67f987e1d12e5362455db0b9e8720b96357ecd154f59ab954c6bcb4fe54974e8 +size 677327 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 48ca08a34f1..155f3ca5385 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2b8aee9f9808ed620f796d038c17c013540d436f3253ad818c76df2b6d8d60f4 -size 584374 +oid sha256:bf9547cd369d39675e380709b715da575a2849fe2e13bacbc5e6cabec5f177ef +size 550876 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 70934236f10..ae3bd82317c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2ee79218542c96b76bb721faf2ded7986947e503b43b7828343cc6512da8582 -size 729407 +oid sha256:dde5018fc8d970b90419aabca4ed13d505ac6da64b1c26018781377b1f054d4f +size 739767 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index f4eb29c06ee..e050c3d60be 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ef834b5dff6f8f2c77cc1a12e181be5e05f1e27fc5b4325cb497730f7a1ed9b -size 648295 +oid sha256:273b4dc0cfd92c38229266156963264338f448f057f17d2b58d1a0666190bfa7 +size 613910 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 3d771b11ae4..3c47841c3b1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d23f9eb08e79cb0d288a84c2002e5bb3dfc2a6be73054b96b671edc2ac87ba4 -size 746891 +oid sha256:0305047e7335d8e946b8ab4f1e4142900ef140634dc40df57f620bc499ce1aef +size 757991 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index ecfbc2380e9..6cd6bdcbda3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6b0f1640d7d58a452d7479f60a98d6002bfedaec9add4d2e79956957c3dd019 -size 661537 +oid sha256:66170e5305178c7a024f63472e2206e12fafb39a74bf789672cb360b9cf32250 +size 627249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 46ff024325a..76b93a5d55b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3193df7df0ed4285dec57a0e9ccd55fac96c92a398cde3003df6f13a0893fad3 -size 739737 +oid sha256:67c03552838242933f337363b0a2525e27cecf1b848039cb5b9b4b89e41bbdb5 +size 750837 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 0e5a66bc85a..f0a3e589e41 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df6244f3a3688feb2bf87ad00904e13154b38ff48f0b0b92caee9a2f8ba90103 -size 654333 +oid sha256:c2bc5e8ed31533209a1821eeddd19d34d48c82b7d3f8c4c3dae279fdddb464cb +size 620097 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 006d91990bc..a9a0934d05a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46ee4eab08a84152db63cc412f6412a98f0510578de1c8acc0399a56322c38b2 -size 751181 +oid sha256:feb4e0ffa152b47e875b56ecf3282490162a7e5c660e90c1a1519ee86844120f +size 761689 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 72f217bd47b..c270b745faf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:249fe95a7e1d74e73100bc8204fa4faaba2ceac740f19b451a2a21d375c0acdf -size 665185 +oid sha256:13fb2cec2271191918f38f7baf5c5e7c0c54928a4fc8370c794983e4f7a2271e +size 630899 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index f0b251a0a03..d7a6bfaf2fb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:60affcd38d376d7a3f74f41e0607e444c1c0baabac4b5fe7b74355b608d33b8f -size 744027 +oid sha256:9869109d5cf31171fd0bfb574cce9811bfd4d19d1d1b25b420b5ce0038aa41a3 +size 754635 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index d386ac05647..e21f38ba758 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8d1ffbc7e968b1a7b2065310f071cf78edd8d68d8e8be7b88837db4b9c42183 -size 658031 +oid sha256:149cdfbf4966d97b69fb5e90c388c12a7af9224dd49b2a9e029f2b30db763faf +size 623745 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index c21b9e2c7d6..eb5109de564 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0407968950e8a93f84900b6589e9f10487b6774ba7691f5e666d99ffaacfa8b -size 818013 +oid sha256:1061a813b5eba703b0ec0599980133b81f3b7e4c45a145eb037b61980b24539a +size 828323 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 59161fce61e..5e2d1fb6b9b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:931be654b40efa7b1933fc3cd03779b0624059d6b66f7aa0dd851d11260a6d58 -size 731375 +oid sha256:004a8cec99b1b5e79d646d0e5c73b81037a79c7b6ad59620e7d171778527327d +size 697237 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 82707f0445b..0e7cb8f6b48 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb7601f30d8291563b7fefb0eaff58e4fa4b7418aff458a755e86714805f7d1e -size 810859 +oid sha256:361aea8885f2ebef096bc6d934241112f112c9f7566077140d1a476b6d07dab4 +size 821169 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index a13ef3db86f..831d7a8a4a2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e205df7cb3ec116fe24633ff5e11a027f26f64509fb4e496c1d8f2f58a6fcd16 -size 724221 +oid sha256:088de2aaa906c7053c14346631ae50ba12d90916e04ed795a93b05c920b0dbaa +size 689935 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 847f4d363c0..0302c71c949 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b290e766bcfd1344a59b38be65b00f8fae12cbb5dd83927008bc6675f3121ec -size 654787 +oid sha256:eb7723e2207198f7a92fc2e11cd4481621b8096969f2d4455edd96e4c9a98e96 +size 653603 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index f3799e6e51e..4494ad89610 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9c6666ed68b0497380a6017b8d19dd87c348c87c7410ea5e622b662858520e1a -size 578066 +oid sha256:5cc7b8e943766399aea5204744f2001f17a9215f53940bff284e0a5108b5818f +size 576882 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 9e6a27f30d8..0f41bb23908 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f43ff4fb83dda1e4710bc3ded710c6ad7228376e222f8e380fbadb09bb3501ed -size 659669 +oid sha256:6158837cb83ef2ef478a4387b440bfad6a062f00f78f204784a12333aa368fd4 +size 658485 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index ae813d4e94b..d1f65d2c27b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:496fe6154efbad905f01d7c6a7772b4f3ec75e5891101e69b10f2b1b07bdad72 -size 583096 +oid sha256:75f787df80a1b9ea4caa5f35ff356784d73424ae66d510c550983595154869aa +size 582700 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 2708cd5419a..10ef8a47a8b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f68e0b5d2ddfc52f9953058af55790bcfa33e9fa9190f634907e5d756888945a -size 721815 +oid sha256:4cf727e3e8b14cd9fc9c7e9b8732686ea17fd22c8a164f0dbfb82ef16253f33b +size 720631 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 3b1a299f02c..365b8ec5d31 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d502052235cdb0e890de79056a462d62f0c03ff54d782eb917f5a1d42f530c29 -size 646327 +oid sha256:4780ab060e5c829c6479650cfb473e2c87f5862c59a046f0e87a631cc831e496 +size 645931 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 2d9cd692f07..de363cc75fb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:17d82cc0c52b5c804327d4dd5ea147f016af37820bcd8b95b4ff38e0526e2a09 -size 741025 +oid sha256:d010522228c55a831b4b67b37405c1174d6e0f4253a19a404f79aba04b919356 +size 739791 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 2f2e871d46c..f573bc16efb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b441d4aac52bbc8851add27b65cce34a642fed14dcf145892899281f174251a -size 659221 +oid sha256:f6586394676f8aed00264c89e1ced1935456476ef49361e67c46872fdc846fc5 +size 658037 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index b09143b9db5..69956798e57 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e72b10e99d05bdb17a9e85f557dcd8a8073e01b2197e6c6dd41192967513133 -size 733871 +oid sha256:ea73df57ce79a72a9f42d6d96f92330f829b074daadba385df506262d8242e20 +size 732687 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 4bf683c4169..6a49a5edc62 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1ebb167fb4d8ad85c51553a214e2731943bf292e86c05d2e1933eed8a80697d -size 652117 +oid sha256:78544e2672ae08001a638118a386bcc58bb7689846a0b73612a05f36066da033 +size 650885 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 82e3a6c5a78..614531f1260 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:837a39be4b14ff0bb5d4424aa53c55ca78c66e200f35cc0173cf7de26425178b -size 744081 +oid sha256:4d3d6073f52fb902b176c5fdb27722b19cf0ec0c6a848a9a053eef1af2fee980 +size 742897 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 00fbdebf013..209feb1cbbd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:faba9cac7c96ca4373d44ac6f8373e48f39f99bdf649e8aaa0dc0aa550a4327f -size 663709 +oid sha256:ead8fe2cb324159c754f49ebf71bc2a52105b22efd7bf58b8a1ec7cce5c4406d +size 662525 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 1fa04e1823a..dcaede1117b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1c693e309611760ebc68d53bb6e30c76f55199c39264c3db14585b85f1efa5d -size 736927 +oid sha256:a09252b6632230418356b8185d647afc6774677f9b3ce93b13762b84efab6615 +size 735743 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 2db29eaac37..24910c14014 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9d46a431c5fb2cfe1c02d808f9b13f3ac113b70b383bdb7203a9dc48c10e01f -size 656555 +oid sha256:47a20c4eeb13d2c607f5066ffc1f9751d9c1b0dc7d2ba2174c27294579f2d9e5 +size 655371 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 74bed33dba0..dede6e337d2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f339f6bfc5aaaaccfe40e5b7434e4e2846301ebf06b5156891b6fd754eac1332 -size 810469 +oid sha256:06e90bb9d30d0803ced94db6bdbc6a457d1d77b0e89a2aec985a05c754352988 +size 810073 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 9227c67701d..19fd0d35124 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c5e8885f7a254227e6cc1753077af78f32f0ef5cbd91b9059dc844f363caa219 -size 731823 +oid sha256:d2d15559988d23339b1759639bc1d23ccbde8129686432b2acf557342cd8b5a3 +size 731429 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index d72d7486ee6..189c08848ed 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:401dbb78ddff6f93c7d497419551b46cf2dd0308f7f0bed4196e3703f46405c1 -size 803315 +oid sha256:77d4081ca8bb8dad8789102e2592c2b82885808fd6b1dc607470334c722a94b5 +size 802921 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 342f814189d..a63981bc311 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0e040c5d131224f61079a5cbd051ec76dd1f0e1a9e908c8468ec2a501350676 -size 724669 +oid sha256:54f948add67f1a38b8a6e6631592341500c0e1be9a64503f5b4348b0a81bcc23 +size 724275 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 45d9ad0787e..f965392f5d0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c04b9a56d2cd8a90f57cbe1e5ad3c7d0124dc0e6a7fd2c9cb299a3a8b76f6e61 -size 649361 +oid sha256:6e3d4425342f12a662650b31acc4d975ee9ca15f3ab52d8b92e4d69314ee8d59 +size 648177 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 41a2fb2552e..526f6b919d7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5a30132812f8d6769b05868e0e036d4d6a474a247279739ecccb197faa92c9e5 -size 560898 +oid sha256:e9d8b0f513d490345075303ed88104f223f85b69738974acf306ac0ffd73b0cd +size 557542 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index d198dacea5c..371a59feb1c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3028b2b416dda6db86bcb5f5ceaf0faa31a54710a5058eb9eede1366a96a406 -size 649013 +oid sha256:1385cfdd2b19d81d76448971809732384f40fb424ea552ae2229ece495f54ed2 +size 647829 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index e298b0c3fbf..6dcab84457f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1154a435a10f53d02092fae936d8b67afcc0cbf87939a81c4198c0833e6768e -size 563066 +oid sha256:b1167ead156f03ea623743eb2c403f90f579743d2a6d713f6db7dad201a96984 +size 560748 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 3a3ae7f5374..706463d679c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43489cbfedb3ad110fc7c6f474b330548cc8279871925b3be54668c90d1b7348 -size 718311 +oid sha256:e2925f07ebd3d0fb2fe91b2c9087b0be4dc9d3e98152581d480701062857aedf +size 712145 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 76351b56e52..7e60c263540 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:693689b6d7aa69062b02d771e36d7d9aeec7f6bb1218cdb2cdb8b85027a07b27 -size 628369 +oid sha256:e0e4a042b412cbe9413431896ca8c6158dc4d4a966217aa9061676c6d4458db8 +size 624275 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 771498f70ed..c92fd69556e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca03650a28297f50df2a5c8587c1d79b58f950b7207a16863cdbaee70ee28b2d -size 739841 +oid sha256:f7d632ca94648f9469c80d4bb0aafe4ce9f40abfda3c411facb0368e782b326b +size 736337 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 41799ff1fd5..d070a591a22 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0bfdf9c151d1773002b2cc19d80e8431c235eae54299272a6b47da35b320c406 -size 651081 +oid sha256:8eedc93ee8bf0c07260448d52718acdf777cd2f42f60bc1145978e3b381c6f51 +size 646839 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 8f140d5e008..84715804aba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:349d303c6379de2a8f78115f6f38834e944bf0594f38d173c53d6d079bb1cf71 -size 726323 +oid sha256:5f8ac3ab916c88c143f57c952ccf636c767436a39173a40f6ccb9528370cd177 +size 722869 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 77c21593802..d45541647fa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:98602e19793f4c691c10ba350138eea492e39de834d261456c6a74f3209fb336 -size 637565 +oid sha256:1b9d99e58401ff3eadb72e3a183925947d4dd5727558892950649ababc667b69 +size 633321 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index b8e7226c49a..8f25b2c4e19 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:82d765db4cc3f92b63eb20021b742141b37b7719db32d03609fbc6eedef41828 -size 742009 +oid sha256:5c4977efc8cbd9581787c63c1f627133bb282aef6071e632bf0537fd5ee1f12b +size 741615 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 0efedb0a15a..35bc4ae35bd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:30b83cff4253c33a90a03ec7c4adfdc5e43d2d04659fdbe83b41ad4f2546806d -size 653201 +oid sha256:e794f61517b036d919abe3cdb685281588f682d7413a3352ebc04c75e80d3513 +size 650635 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 1df365fea78..cb62d5c5834 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7faa40b8a8952dbeecfdb54ae64c58e8909e485b1598a1bd63953296eb79382b -size 728491 +oid sha256:132ce9faa126be08479e4cfe3df44629ad46231bc3d43e471515460d3a0592a2 +size 727307 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 3d2417d21bf..57096b6f985 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a91bd1ab342d86b3f9244cb8735353cc926a33b2087c75d332ac919f42e0f238 -size 640473 +oid sha256:6dc483efc28f554992e2fb78f3fecf9f69dad30387af5d3ce4fe29d00394c221 +size 637167 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index eb1061d0043..05ebe4429e2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:39a31fec7756ea0ca440b7ca2836142c5435c0fd8ce1a10da51bf457005fa1d8 -size 811899 +oid sha256:366a35c0ef7b2c4e2b3e1f2ee45ee905e035c6381c9eec35e9f9d95bcfcd5465 +size 808545 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 82f9d9999a6..71a1273a911 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ea290dcfd14115453362277af29af313cedd0adafd93421ec63f4120df67d6e -size 719193 +oid sha256:960ba1a4a31057192522851f6a47a876e149b79e58a68405061f68b50960a35f +size 715049 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 4f6ae369a56..aeeeea7f599 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:941080114e578578fbbd0d355e4a4894d9f866c761a51635ce8c8fc850627410 -size 798431 +oid sha256:c03ab373749abde0b47fbf08a61a91b9f8f401b2976ec2d4def2a47e221465cd +size 795027 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 4dd39618fa4..2e772334ebf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:93e4247ad3eb74a5e710d74bbe496d7c259134c450687311db857095960a0003 -size 705677 +oid sha256:e934763e9b54f009cef81323ccd78e4fd924e335c1728298ffdb83dab30195e0 +size 701533 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 0de539fdb93..40bd42a3b08 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:baba73df6bbb10c31350d085d063f1662116d822de6e4034a3e768812efb003d -size 618179 +oid sha256:d96d8d5088eda1e97a6b4b904b1567eeeaecd9516406e108ef9fcae1eb471640 +size 616994 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 0881ffd5a44..a4986e76fcf 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89f6893aaf9f638dc27546aa8220397f2b0909f7f8dc50d536e4d7cb611eefab -size 535980 +oid sha256:cf27b6670cc4365950e16d00a98beec8bc551a8008d9a8cfb44ac34a21e351c3 +size 535586 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index bffc54ce0df..74193eb3c8b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70aefd3e7290b6189a14ef1b39ccdaee245299bbe9224541bdc95860b0df5eef -size 623307 +oid sha256:d0dc592d16e6d9da74dddeb0e2b152fa2bba73ce7896c419a8c9ae669fd9a2bf +size 622123 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index b4fe85a7841..b5c4a7f7d6e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:491828316904c258b5ffac093fa93a93591fea24072da21a20e32d17dc794b9b -size 541898 +oid sha256:9df3560b88dfa6d14b1ca2dbfe4150ed4b11fe6d12fd03f8e79a37e053c68ba1 +size 541504 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index ed0e14de760..5628ee44e1a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b48e2a900db2fab5faafb217f01a45600ecf9f22305f42da35721bb859e676d -size 685995 +oid sha256:ed66ed0be1cab2f108aa680b90196c3c49b6a608f3a5633c8d7c62cd89d29f6c +size 684811 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 55f21eaf0ee..12a03edb9e5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4759342e2626bd32afb45a11ce609e3821bdfb067d67f4a668d37dcf9e00289c -size 605030 +oid sha256:f1272c1eddf35fef1cc981e4636bba2b49b2b95dacc52a6c3ebcdd9b831bab48 +size 604634 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index af712824bae..c91d2b8ceef 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4859662e270f7af095bdaa699eaea061b3bbe0ccdb1e2a54162cd9e49278180e -size 703477 +oid sha256:0b2838b1924ae0bb306aebf8d9914e158948e4d0a5304d493c5af54e04669355 +size 703083 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index dfe119e6d69..1d3895edf85 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a86e546f06d8f5f12e0ee1cf640a586fd06aae47d611e4541d0955ffcb1a21a -size 618321 +oid sha256:93f38c66a04ad7e2c322b6a1f52a862f989aa1c6d09bd4de2fa046b40f73d954 +size 617925 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index be9178f27e8..7d40a53e816 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0ed443e4657d9edfb6433d0c675fffdc8762de58d9fcec6f1ec3b096dd8146d -size 696325 +oid sha256:bd6f51a31458884d439c62d2a2c49535bf5c6b70ed6e7d6be54d6c8b01d966c6 +size 695929 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 114c9eb1d91..c99c5e34959 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6773ae06a226d51e3427019769854170b7b98e09e22144ac5117e313fbb7e61a -size 611216 +oid sha256:9ad96edad741dfcc0d8007d9917e57a4b2891a3567b83716dd086d8140ef6b29 +size 610820 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 0d1cb63a3ff..31285a833bd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:20775e2a8e1f7ed0c974930a9e32225fb383b62f17ecc0dbde5e1f922f2cc21e -size 706979 +oid sha256:5caa71ea4e57461b8fbdd545f75ca09501efb7029ebb1aec22fb2e665b9fb7e4 +size 706583 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index eddaceda5bd..343e142e014 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa66415c2c760a6e2b7657f7f03c6ee797ac5f8e8c19bee49149de5964923651 -size 622019 +oid sha256:ecfc5b4e398aff01854541300cb9944168d919a423573a38a6901841dc8360a7 +size 621623 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index dc31a655193..23a2431240c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01c1263b9def2c3141e600d968b05a56aeced0bd4c8a4beeae1e3ea9981f31e4 -size 699825 +oid sha256:0ec494e84c4936ac572f3cac3773082eeca984995023d38ef925fa4c0b806246 +size 699431 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index d9110db695d..2b463ef21e8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb6966e78d5f02bf36c659727d1a432afbd1e8f7cdbb32af94f23416fa100e6a -size 614864 +oid sha256:9af498147dbb687a59f9380d8d4d0d9b9df972a9a3aa6bb504fb476ddea21382 +size 614470 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index cdcdb9c90b8..5c73ef5e074 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:614c851513b2a4c913bc38ebce214b90dde3c558459d576858c5d980568baee8 -size 774649 +oid sha256:ddb1b81ec33f46d2dfdefc9163ffbfeb6bcefbc4b75c43e11d8f9acdca365f85 +size 773415 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 1c6a0b1593c..c7d40acf39e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3317e55704f73af59098a1017b61bd6944a73cd27813cd41a02b4e18dc5d3912 -size 688997 +oid sha256:ccc8a00a80d11abe92d1f0fc9bd625a5f4d837b884dc8d1f30498844bada6543 +size 687813 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 53139f4a172..ccdfaab4899 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f19c2139966f6f3dbbb58de66196e3c7d6635449db2d5d7ac12ba8a360ed56ba -size 767495 +oid sha256:e0bc1585f3c4c2c69a05d196de2df775336161e08a35e770200ea2ab723229af +size 766311 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 76c1c83b1c9..72f55e067ee 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1e4ae8099e961a8d47299b828df7b8b1ba43ba97c58e16c1fb1371b9ae256898 -size 681055 +oid sha256:55e5c6b318e5201f8a62d5e7e0b01d5251ad9298b26cefd8c0c9530155aaff78 +size 680661 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index adea3b9a432..513ef7fe8e0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1da33d40c8d799d2226c2ecca1bb931707686ab4a0550ab8408323e6e2f280b -size 671067 +oid sha256:88e43c5f879c6ababd16a308bcdb4618affa07c09ddef7c87f7bf713c9055d86 +size 631995 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 4f2e8a97584..8d509837897 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec444ab9209250599fac042910a4a9a5fad4f826584b91e276caa56a9c4324bd -size 594346 +oid sha256:1fc4b12e3eb09fd7a599eb676e0e1297569076096511a94752ac0e31a42c447d +size 554582 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index dd750012249..1e9058c0765 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:72e99fafd823b30d219179a559b39ca54673a07e79de51bd12da4ccf422c854d -size 675949 +oid sha256:0e42bb5aab59a8409f3311da30a1a071494eb2e075303883f9213b53c38bb9f0 +size 636877 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 146989134b6..aa86e9503e1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:54de3342bfd2d981dbe379e2365c4cb51ce6a91246f2ca86521200d84632c32b -size 600164 +oid sha256:c3daddea950dbf3c35f32b3298ceb82a77428ca4a036f5e1c67e7bf384715972 +size 559612 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index a0ebe121d50..82d507e16b4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:109835ae97ca741bf297bc84be3e30ea35e8754a20e16293987a4bf9f0b1d676 -size 738143 +oid sha256:664b214c9220778b979ef4f3773ff5670a67a8f5ff86ac141a3930eb5e6a22cf +size 699023 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 2e15eaa17a0..3b59698488b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8f8ccefab34be38451e32851b9729baafe4c0b553bec16edede8bce21b49092 -size 662607 +oid sha256:7e3325a6c9ada3b191b966286653185ae68ea159d3435c32afdfcab6e714dad9 +size 624867 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index bb1686d35d4..3913a9ec80a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e6a5c92b9578103b394f9f4c4f49cb5a257ba6eff5a5d3c9c89303cf418cab9 -size 757305 +oid sha256:f359a9ef05d3fd219b4972efb6e138ad8fbb3294fc02e031f78be0f45717d2aa +size 718233 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index c4b7cdf3a6d..8a6bf397711 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:abac427cbefc486526d5554546be6ba4d706a5ebfc12f5623f1edad229fce14f -size 675551 +oid sha256:8d5fd14fb2053d204ead0f14c84545c5a3d6997886f3cbd82ae56a806d3c0bba +size 637811 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 57493cc7d7b..3154749fca7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dba5653573f07a39b36b71c4db12830c4b67a2e1c3c36bdf630fbcdb0433fb35 -size 750151 +oid sha256:bb0897cf01534eb16ed1d4e185b889592cf6f366604f71b1d403e007564c7400 +size 711079 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index abf2a1946bc..99d139cb8bb 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7787c98a88430daf20c05677ad2cb97d68c591b0ea2e1b114dbac49c43573c7 -size 668397 +oid sha256:695993e588094a0e9ca5568c77e2b2bc2b45e394199700addc9758bb444c7e83 +size 630657 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 2512f710a4c..3ae5c7705c0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:968e983f212505bc4e26df82024566ef9681b5ca2cc2d409f3998145d6fcf638 -size 760411 +oid sha256:372ba6e8224ca35081f0ead28a07a3b64f7f9f85c6d381d6dffe58c1ff33dd92 +size 721289 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 6cbbc469e47..f42c1cdfcb4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:afd732f7c75924b919aafa34297dfd487fed30f984b93ee52a68966805a516d4 -size 679989 +oid sha256:318103a47da05caf23f6ba975520eb24de7c6986314e2d05fa2b667df2dcd1b3 +size 642249 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 32ee34ce0e0..941536c1288 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:161c4ffdb4a21da3cc321a210475570fc9f145c55e918d8ce76c8168a1178259 -size 753257 +oid sha256:4ede4592a267c4493db0c03bfdc98de56025acf49d173de8a00e57fe43b8d8b6 +size 714135 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 4dda7ecbdeb..12934f24094 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:53306f3596bb5bb591d2d46d29de166c939330b8d266299da234758579495bb9 -size 672835 +oid sha256:b75a8d9d2778b046bd4a0b987e8b3968f9d2427a31c2162994463d81f851c1b8 +size 635095 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 1765c12c946..6a071f8d75c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:59096f7b6486ad49223866c2556dab82aa8db3eeb8fb818667a8ab8f90b6351f -size 826749 +oid sha256:d5b5903e5235ae98590ed03918f88663b777f1b321433fa31eb3a4531a288a23 +size 787677 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index fdff0d75266..790f768911a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb39e10465c24f8c045f968a5f6f4209335508961649bc967012d8cadf3fbb6d -size 746277 +oid sha256:5363b581e4eeb8ab783fc74890b50c5179081aff9046730f9e112a1845641ca1 +size 708587 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 44ae0595dec..6109b97881a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3abf3710708f5d1111693bb75cfc2aa10b1111d12a117fcefb4d1103f334157a -size 819595 +oid sha256:83d8076383c35bee6ccfe3ccdf2f443cb284f8b8b47c99fad6af7e6f4a832879 +size 780523 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 9f165a4aa1a..939889d991e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:139c8d908fc7dc0d8144d65959999b92ce592e767740d432b4616021a35e1552 -size 739125 +oid sha256:706d506b6950a6ba26c5602bb77af553af3558536583d83d9f6d2921f2ae660d +size 701385 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 9b9bb2fdc1a..f8520270a86 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:72909a516a5a7542fb33cd0558243a158d9e3e4c416e6dae38025bbd3ee24313 -size 665641 +oid sha256:26f8ef06caad000af4a4d0b3942bd65f2dba10d7318680aeab4021be5fc1c65a +size 625731 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index a2a38056e2e..94ef2517a55 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fc5f4b73bd3a07cd5bedd03e5c5d883b0f31818f593da7db35b2f5fd8a64ba1e -size 577226 +oid sha256:9710825b864c78195f01e4991374f63d730095c7598d4c92a30fc2012947f7bb +size 538302 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index 5405b4870a2..02aee15e595 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:48639b42fe74e705b2f7f95b68f1eec49b866228994c37b2f546449b0c6e3e82 -size 665343 +oid sha256:02c151ac588e4e02833859d1576886a4de4969526ab6a381b6001abdd3aa15cc +size 625433 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index e8c717a6644..35a7a8d4e80 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41d250eadbece377dfb736317ad361204692a8c9b944b7e9967140fa248f3fd5 -size 580136 +oid sha256:ef333df6b0d8fb6687dccb3e825c46386a15e5540eb03cb1b158ee88f3c99b85 +size 541508 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index b39618786a0..2302575af69 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:91fdda6c640b5e3a9463d75a4d5836c45537d9b552210cb10f271586eae46548 -size 734641 +oid sha256:aaee75a9444b93982c2c0f838337ba64239ab8e3be6ec0023c7cb93f5ce4bc12 +size 689699 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index b973fe44edb..c6ac1a3fe28 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:127c6c466611092960e54ce03fa96e7d92814e66028996067151ab7d2eb53035 -size 644699 +oid sha256:5b4a26edf441a7e0b82f923a88adf7707367d864746d707b209948cff95c2b92 +size 604984 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index b6bd8811aab..0b8f751b14c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:650d9695c997a338a2ea9114f2af04ea955500469e501692b8d57d8d62175c21 -size 756959 +oid sha256:c476585260eca4e7299fee6b9742be7cd80ad7e3a11003b67492f27d5081a9fe +size 713941 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index e9d0ed938ac..5b057ccdcb7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c59f2abee03439b2787148e77cfb90945d0070403a6b99f031ee7dcbd4a479cf -size 667361 +oid sha256:6dec5cc0d3bb67857917283dcd1b6a58d7ea0e18111e67786af59e41b5026732 +size 627649 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 22734f999ae..6b49f2f6866 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0118f1de3f8daaf33256cc61867e11c289de99250cb0903dfd6221fe35cfd5e -size 742653 +oid sha256:01aa3082c3ef5d7bd9a3836103b964726fbcc824bd8044b9e56fd136fda57ad5 +size 700423 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 6fb7da3bd56..e273d3ca276 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f2d94c0e55c3a01b29f964a504fe8799b08a076e7ae573294e3e509c6ec3866 -size 653893 +oid sha256:46d9e8035db159b4afca6a508bf6ba8099fd735e049eb49f98606b755248898f +size 614180 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 5c3001c9ad5..dd24379998f 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4746d3d8d1e3521dfbedabff3bc71be3e5743c2317c772d0c468317f0552d566 -size 758339 +oid sha256:f4d3dec2604d70279553f63f95c748e7b78d707158eeebe8dd1665763e8449df +size 718427 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 5dc40d43968..85292c57bf4 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cf537b9faa6ae72047f8de37e1728bee7311f509675cde693ce73334a09a50ad -size 672047 +oid sha256:51f91b0068d26d3528321257145ec71dab650a39933bcadf8cfd61fb602d0eba +size 630705 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 281b957bdb2..db88e27dc73 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:817ac1d7d470d482aeac295fcf490a2b787bf2a6dd697bebc4608be53b5efb55 -size 744821 +oid sha256:54c792676237c4ba04729f6679e324f98b0ec2e8108dc52a917cc4f834e74ac1 +size 704911 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index f3efbb052d5..30897a76968 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b69ffe5f7fa3ddb38b6609898ae464111723b1800ed4405cc5c6dbd823dcf28 -size 658579 +oid sha256:5a62dc2f8a3fb7b12737fe53389238e5ba0177cd59a922443053df8835a7a446 +size 617977 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 6740e3415a1..2943b703352 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:69d566aac0510c5e80f91a291d6ff88fecd71ffe11d5583f15b469af7fc96f11 -size 828229 +oid sha256:9c72d25c2b00471b9cb72ad13ed9322544f2b0f6a859fba0d0533236cdb9bfa7 +size 786097 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 51ddbad57a5..3ba6be59ab5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:091819839969c2740ec5cbd7a3dd29fdef00ed3c4e13f8f5315ecec0de8c3dca -size 732909 +oid sha256:5caa9369156f8425e14e73fb95d39847bc03e537e3fc6227f9b3efdec7e04443 +size 695859 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 1ca2cae461e..937b42391a9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:15f44d7082fff8ed4ee11c9f1b8e375c550dd124bd43ee93f02fab43f2b8c9b2 -size 814711 +oid sha256:ed8aee5f00a032d2a157e850e6f46c6b1c75bbe76fbfc0fee4d2c23e7ee100a3 +size 772629 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index a88f8575cbf..dec8dd4960a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5434b90d09ac1513b4c3f1739f36e8e0e2bb0c43a680b8763295c29d8c90a41b -size 719441 +oid sha256:8e4642d329d245ef050b508c58c305a3b4e3815dd7f225a70b5c19528ca49567 +size 682391 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index b287e212c63..4b1d0e0c770 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fdf4aac71b7ed018d9fc077e7fc8a1fd251290ee91ce8bcc69f6476ca8c40a28 -size 626319 +oid sha256:1f68d9c7816c1eac4ed9169609d7242db63e320166035f5ee559d9254fb32828 +size 625923 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index c167bdfc6ce..ea6f71a39ea 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b587e6bcd01a1d74fec0e31e976f4064dc973e8b0972f44427496c34dda47ba -size 544218 +oid sha256:d245b58789f9373adb4eb70e91ca54b3ae0d60f462dbac95ab1b1d3dcdfc9353 +size 543824 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index b5d33f8dc5c..784f1454e6b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96d43cf5bbd8b9fff24da5183785445790a906f93bbb78b69408c3a08b483cae -size 631497 +oid sha256:38e20344033b2555abf660fab37a8a49b3c4b6fec8566778d97472ba705d4a95 +size 630313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index e5c632b55f5..1553f3092a8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:27d72558b5bff8fc52558aa5f23adba73f41d90397024e9b332e3da42ac08390 -size 550136 +oid sha256:a5e941bddda768efd181a294b360e79f8a6bb4f5cd9933be5963e803c1583c3a +size 549742 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 533d805c718..67378ec631e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0c66f4baa8f34da3a308b5daadf1601c0c2923f6073c0ce0afa1466e3c9809c -size 694135 +oid sha256:672209df04c0aed5846a6d6eb6455c04707b78612c5672f61d3597591dae7c35 +size 693739 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 3826fd8839c..6112eaf6151 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0ae1116356fb9676a81546554790354f422e7fac83aba068128828c433b5c88 -size 613170 +oid sha256:19bfca8f73e41d395d3e6844c6dea8adc8a744a4b27b30dcdb398758b7a542b4 +size 612774 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 7f210502490..4a7778c54d5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fe6cf650592ff795dd493776ff8c5ba3b24c5c8fc2124cb87acecdea8dff724c -size 712457 +oid sha256:64b79d82cdc59862eae7a35357bc4205a0606b62f4b637b10cf83a8bb3ee52f6 +size 711223 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 8cf5404daba..de430516e06 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:626617cc348f917445d5f139e2792ea6d39cef97e3f8cc29ea0a955cf93dd3f2 -size 626509 +oid sha256:a8b336587a1cb0937b20ce5822ef3d0b5d84dc7f97ed154e3675e2a949ba1143 +size 626115 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index b1ef65c9afb..3a015a77a25 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70ff44e59824d4d6eb835cc6d5f9ae4575eadd2e2dc9c2bcebb78c829f70a04a -size 705303 +oid sha256:0fbda1584e1e47c18003cf7e2e54df34602dd77510f5e5bc5f437cd15babcb2e +size 704119 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index b2c8177c8f2..d487e133d79 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0083129b976ed24cb8715a0b63d8e1752b5d584d3dfd7d37dc4e0214a6fe94f5 -size 619357 +oid sha256:12a0ea463cf9cc67132d0ae3b137f6abaaa293a19a4917a4afef6d52afbe519d +size 618961 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index a9c123b95eb..b43d7658569 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:48d5b82084226af5111b01f3ca91be5cf5bad4c3c14feab8c87707b293dcd330 -size 715907 +oid sha256:39c2e857d3ebfe23b534062efc2fa14c2653e4562a35b07d44e927a2d323a015 +size 714723 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index a41d75010bf..dcb98c63bd7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b57b4e4d3798f8de2255208f485d5b88b9682535559f39d013638f34cd05f95 -size 630997 +oid sha256:531eeee28f5cff0dbcf180924ce68578586d438bed65beacffdc46bb6280d0da +size 629763 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 6b4bc405819..45f1e1b27c5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73c80d687c1c1b437d1190ac71901c6260323d33389c41e9a1ebb0969e9080e3 -size 708755 +oid sha256:f69859630a6e631082085bf0adf2a2b0165e973be9596c2af5b546212d87d99c +size 707571 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 258aaa49f23..e548fe5f894 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9011ee3c98c173824ff1f5cf59ef0032ad68dd62cb1414b466996fa840dc491 -size 623843 +oid sha256:3c4184f7abab87d094d132e471594c6cc1363af0d8e27b1b2ce39c6d820aebd4 +size 622659 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 5b6959a34a8..b2dea586ce1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:387c42fcc275134217f6de6de1d557adcbdc153d2fc2ed15daf77c387ef8428e -size 782789 +oid sha256:c17e311ef4876bf4f5665f7e1596b25e4b8ae0b05cc4b570eff25db8527ec2c1 +size 781605 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 9da38a6835a..a4721e12608 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1090f367ee632b08074c3ac5f34f5865261a25e55d489e9faa8869c71bd93a69 -size 696447 +oid sha256:b373db2f1cfac7d96cbeb3bdf91993ca6ef1a2411cb4e4f2d55ee21f280562b1 +size 695263 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index afdd63bb5d5..aaa040fd408 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5a281a9da88edbfdc8274866b4a3cb5bcfc4f63f1d356b6caccbcfd719ed2a44 -size 775635 +oid sha256:c1a622e937e540ea8aef0d426b9d0f914cf43a753ed14b4fc1598b256304d307 +size 774451 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 0bed8f76593..6341ff85c7a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:33b12f9abbfdb03fb2136acda978bd89be3374200a157684a984a9aefdcc5aa9 -size 689293 +oid sha256:3483f215af77ad70577a3700f4c29cc33599e20c231b45234dee0f3cb4fccf08 +size 688109 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index f72c598629f..30b2eb94b52 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fa6088aa260e60a6685a974083b1b5e3b5509903217d85ba724c6230b5963f34 -size 724447 +oid sha256:a4277e4db36c3daea001c03e05bb076df65e47799df0160432c7ceeb048c597b +size 697855 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index b871ded34f2..1f31e6a9f63 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23652ee2c5be7c84dbc5ae4ca0afb9f9ad7dad53a8ffb6a1c98bb3f8e3e2f259 -size 634751 +oid sha256:e9c9f1310bb5cf37c5b01e95e680b47947e93e49491cf811087a317b40b78f4a +size 614374 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index c40a8f9c6f3..f7e41e58f9b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:03d2ad0f8a78fc656cd9ce2ce7e2c7774e960be7bd11b28c3a4c7c3e5f56a425 -size 727897 +oid sha256:f5f139643d87c1c34d765ab1b9681546979d03f5009ab3fad7943385187e5a2e +size 703329 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 40ece1de08f..9f197172c0a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7d67f93d5ad9cf44219c3e1f893a4897dccb6fbfb7258e08dbfe3cf9bacf6d4 -size 639089 +oid sha256:ee48b3f1d0d87548dfe90474c975e0a4fda9902eafe3338662d46f22fb3d49b8 +size 621725 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 70084000e5c..bf6a16cc1f0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ef895d798af17774d068d3cb32c245948257de76b9320f931d3415fd2dc0474 -size 792263 +oid sha256:9266c133d746421ad4ee5cbe577fe2d4d144b1aea8b47cba4b6fbbac00e75328 +size 765671 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 5d4557d3e36..55d09b8ebba 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e30421e8c2e35bc286a8d6da24e541adf252073ded3bfc3275aac5a01e623df -size 704539 +oid sha256:ce50b7a4bf5f867da6dd3974b48aeaaa0c95dfce55f808d4edad45501212fe0a +size 684559 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index aaafe226d2d..95b6d39161e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:82860039c6b1ec8bbe1caf00450f9cc9a56f7fd1558b61820f7868adc24bfbdf -size 824743 +oid sha256:17c728ce18d4c3e30cf2460dc2b22c5a23076eefa31aad5622bf3857809f9694 +size 796179 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 0fd138746c4..40cc66543a8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d88ca507a9ec6568ee1463606389fc9b553df2ae5af2d3f3b5c7539a3ed0e57 -size 733517 +oid sha256:3045bf0293660f957b9423fbac332014522fa6be5f384f01ddaa38a31004ee9f +size 705891 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 12e7ecab050..76c0d3b9f30 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9cb01f74f4341d489ac67dc7ea25c70172cc234fb9849e4e69abd1cb81a1946c -size 810437 +oid sha256:7f3f76e8a3f17801ba526732638c4dd02534762c6fe1448cb78df45fb3ff52cd +size 781873 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 64b33dab181..ed292d8002a 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:939915f6126d020e2dd53d811135f26ea481b4e8eb4de9ebe3977c24c728dce4 -size 719161 +oid sha256:6c9bb03430c6ed6fdba47db5e55120ef858f27b9ab715644dab6c8aa43b150ec +size 692373 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 811d43cd5d5..1c81c9b5c28 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e29723ce4ef82c226fad5b54233163217a301266c933122ba7fdc3789c88244 -size 827011 +oid sha256:8f996dcffa660c7b6b64540f23e8c0b762dcb20938a39a259e2eb5f945c3755c +size 799531 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 94c78965eec..867ec7df391 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:726405555d6249ae423b8f02979d211b74487644087fa4ed166cea7f4d0ac86f -size 733269 +oid sha256:f3f98a2b9f1cd284b7bb634e5921044d18f23939184fc5e0326e068e64c149f0 +size 711759 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 16060de21a9..5c69161ed55 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7994b4f86891bea4c58b182cdb2d7b044417588490af9e8da6d5f6fc0e6f1af2 -size 812703 +oid sha256:12b240fb28746814aa8d3c6fccf8e89d4b79cf2d7e29448b5c1c3e95c13ef60c +size 785275 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index f7fd7ba0b99..30b869587a1 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:50e64aa71c58ed71f4c8e8e7ab26a63e436a4e0124c4d16439cd883d8cd92857 -size 718963 +oid sha256:98db9df928505ce80e48717e8e2846a79711c66ff1419bec23cfd9c2b755172d +size 697453 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 292ee07c508..d82e922a4aa 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f245b03fa3210d6172b69d6f89dbe9879c47228399069c23228a118af3d9f8c6 -size 894285 +oid sha256:e17a1ae8ba814410d8227d3a55c7301c50cadfcc18068058b1ebd0b37638c6ad +size 867201 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index de8f80916cd..1ff4859a7b9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fab062c47954b32d092c329be7248c3564b00e9197e96ecdf1fa42fd21abe2be -size 800149 +oid sha256:3609bff1aafae19a5f6ba4fcb593063fad84029f948c2f5d4739e982c2769e01 +size 780269 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 6be4b9553d8..7f47c3e9d50 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ccab798c620107ee44fdc5efc900a331090bbccd5c3d1ae8c4e6e1b6bb6dcd1 -size 880029 +oid sha256:38ce4d7ab63149d597e7a909a2a0178f7821ec023c363578c464b0a3c7b91d95 +size 852895 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index ea910ea8706..533fa0e8ec5 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:294cfd21904e252bbe346077c6a3bb7d23a17a9d30baaed02355e3a9c2159951 -size 785843 +oid sha256:67b298ed972efd13f5f7fad8c7d162d5f5c4695b45922320d79966af488d2dad +size 765961 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 8bc1228fe0f..2b82cc9175c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:11fc8b9024f226bd875e51f6379120209ed4902a9e4df1131b823003f965e94e -size 681575 +oid sha256:593793f2f5579673c7e315393935a15e9aa2b6076eb58114047baaa054c782e3 +size 655971 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 9a16a9a6d9c..b95789eed99 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e90bf3287533815f536afd5d68792602ebf5c77e662f1e5b20a2385f2571514e -size 584232 +oid sha256:a4e8643e6d6a48773c9636aace190e89ed707e894c4067972d99dd4942c5856f +size 558036 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index f96deb5621b..14c0e7fe701 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a0bee4106dfa0bfcd82cc22986983b3cf41e38ae03e8bc7e78a280c32c38c027 -size 680785 +oid sha256:e100826e98fe7cc807350b1ff2c946d298e00d21da38e34c8e70b7ac9ddcb252 +size 653453 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index 64a1f65327e..784ca7390b9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6bcc4aaf6b7893aecffdd13dc199a3ee082d118d48fb3431550521514d75d2e4 -size 600412 +oid sha256:4a1c172cdc954551cb210f4d04ba320f5b0aabfe1cd93f2b2817c3e1ada7bc95 +size 572686 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index 70720c57cb9..3a584c52e22 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9df649e455c355615165eef077d8ee259f481a7d137acddb7efb805930140bf5 -size 753683 +oid sha256:3c75467aae450a26c301b2784fcf5183c75056d311d72cb577a6535a0defa2fb +size 723935 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 0e22f98417a..e34e1525945 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:041bb6dbb909ca25c31b369561bae1457fdb00a2dd5f85b54ef010d1228bb9af -size 652543 +oid sha256:a219c56d4a7da48c3c4a5fae883e02de9f4a1ac70f08ee16b94e7c49d9b68399 +size 626347 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index f6d3850c975..6548643814c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:760ca719ba082766303606ddea9b9d6f02b700048285f594640ab941a8fa0fe0 -size 800373 +oid sha256:e1156f2833c0774f7bc9e820abfbc7a0f2365cc0d1d2e1d0d592b38b47c5cd21 +size 774225 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index f13ad6ef6dd..ccc161c7205 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8ccc1fb78fe049b548adf4aa97dbd0408ca188fe7f8451b07b06f7223edf6e17 -size 694397 +oid sha256:8b0fff8cb51d620e076a92feadf7bde97ef6c15eee6c323810957f899b66c2b6 +size 667313 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 5ec28240d00..79eacd3aa47 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e40845dda28c9a98e0bf68a22b8970401a83ff4e9b853d15933d074d9ef8f4a -size 772597 +oid sha256:f66a16d4588b6f72e7fd56432ee12d900965b6461273b01de2096ddddd9ab14a +size 746451 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index f69439b33ed..62c912dbdf8 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb25178b4385f6fe4397fc08e5da8437c8ceeb0f8dfbdfe22b1a2646c40ad26a -size 666621 +oid sha256:2291c86fe9e5b6b8da598d1dd4af2bd1e7ed351fac0ae84d2729ddd7517d0fb9 +size 640327 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 7de9e8ca9cb..06eab25d628 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dc80e166f58ce0508174de48204c8c268b016835baac094b3e218a2548a434dc -size 795487 +oid sha256:fee2783be048f803b820aa367b8cfdcaf373fe95ab5b514bf7c28bd8118327d4 +size 768007 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 83d7538ea5b..a025a11c97c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23e45f2657e17e37c5e084544a8f1866f310bc30731725ba742d7beccab7a701 -size 711611 +oid sha256:dbb8d928b4fabea82f6b11fa381727ce5145d548b059f607339a7b0abafb10df +size 683295 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index f86f481930e..b6dd9500188 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8fd3aa74adf8cbdf6756488ee4a1ddc095e4a409b08ceb5a60d4a3875f8b3304 -size 767711 +oid sha256:363ff50eae0fce3d73e91a210554690603c5ae36a73d56615fa649f6d51787a8 +size 740183 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 5701faee1e2..3b14856eb90 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4fa0f767b0f8ced1b6d6eb98949b161f6f5aba98f51b2c73658db6c87f67ba75 -size 684577 +oid sha256:a51ef82afff8aec7a4dd27450820a11f550d2cc9ead290dcad6ea09f71a5d1a8 +size 655519 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index 56d658bc092..be527b589c7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:417f0d7b69940d6646f173baf540052ce7dde7d88f60016d86dde81e2424437d -size 864537 +oid sha256:586e8b18c9d87d2feab1dd51d11cf5cc6f475cc0a6c75c660c26438681ba56d3 +size 836763 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index f60e8b90c23..528fa2be4dc 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:234461a33f64d650ca0aa59731556d677b9a64414211af5946df75e6e91ffc47 -size 759745 +oid sha256:6c04ef5414a62ad57af5628ad065fc8217e625e6765001d9075ca9fec2f1341f +size 734733 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index fbb5e19be3f..7f88fdc8829 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8623b9f2986a515f57edd1c97bf30a4fd22ff7a1238ecac10ddb55be26cfc15e -size 836713 +oid sha256:d2ccf31743379879f2901b6cd6ff7085ef159fb800e80863229a9f86df817b3f +size 808939 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 436dc2cd8b6..f53df067409 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24820de85fc66bdab9cd93561578a098ce1548a09e85292d00883b7397521323 -size 731971 +oid sha256:5eaf3bf34dc24dff469cc9db9040c0919a22cfd5228ece7b4d1a0acf80bfe8fa +size 707749 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index dcdb0f83dd0..be31209e579 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d53807702d66018fb43c603db0f58e8fa8d56cd17f0ef5b36a095222ff3e452e -size 644917 +oid sha256:ed9af69b33da03f27af28c3177577db205f2c78c57ab50fd4634f0765ad739ea +size 661887 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 17677a5e777..9d681b251e0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:67766915329182c91b3ae22d600fbe89003a6eda8090ee16a9e5e66768565576 -size 559758 +oid sha256:44f9343b7672275e76fdc91fbdef9336c2effc2e023054a60d584c1ce340034b +size 576730 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp index ca629705ddb..7aaff31b0a2 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8bedf43f689c926a1073a52beaa973be79da1cf187c6b11f225101ebdcb918cf -size 644915 +oid sha256:c7e6a8108f166223dd791a26c464af46f96ac2f1a13a2315387fb1153328b893 +size 662281 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp index e31b95014ce..776d55393b0 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94f93ab23a411084bffcccdf312384c35d1733ba4b33392d6c3d24cfb77476b4 -size 566318 +oid sha256:0d6434e33a3daa9977d350f8cccda1d8948c2c75dcdaba618dd00c0ec2a4b7c9 +size 583288 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp index ba048ccda82..dca0b1431bd 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89c217db3091b815f72653f917f57026364ae4b6ed55ffa8b11c04dfbbb6c04d -size 711943 +oid sha256:ebc66204ea01392f87a1d13d7c75c6efec72f0147a5ddd5881a6567d3eb17d94 +size 728915 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp index 19783a23320..561cd6bf3f9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6fbabcd2139cd90f96fc2633f260050b5546d31af9ca7363dd49eac3a812aba5 -size 628661 +oid sha256:69a7e1422d729d50bf84a0a2509b9d4b9297272b464b247cf1c54645ee5a0f5f +size 646421 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index b826877fecd..461ba30d493 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a10f552d3c94395521448b2c39e3e5c57a0dbae59cc0d0f35074b1c8d39e40dc -size 731845 +oid sha256:7e0859044da9432034fca7f7d0a82be7c58082a46f9c195426565554fdff8ff2 +size 748913 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index 3f64df28314..6d4551bca78 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:702c587b0461a11aeb0dadfe2e2a2e865492be88e516578ee6d1b3f044eb5c3c -size 639731 +oid sha256:df46dc3016b960302d476c97e030ccf9d0b0d1280a1e908c391081f9c2ea2b4c +size 656701 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index ad49c8b66ef..07b3635ca60 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e1a0838baf34af09af1ec804d50fe215fda0a61382618fa88e8225e1d79a218 -size 724789 +oid sha256:958b951cc74c33bcf8a91831da19c98b6c0c2a2b086db76563b0651f24a70b39 +size 741761 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index 39e713266fa..32007c29d23 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d6c5f2805e2c496e81fa7837b8567f8ab91d95604cd1da4c407214e88e4d62b1 -size 632577 +oid sha256:dbb52b145085f6e9042ade66c0c08153588390f6be91e281da275e0d48b2a849 +size 649549 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp index be2444b368a..017ec835bf3 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0461ce5f34c9c033debe2def90135de0621a3f3ff4f6d499a7f8a66d47ef641a -size 730165 +oid sha256:04d58d648ba7f8a0b421a9812bc097b2a4231962d3cd10566f3a9bfc5c66d91b +size 746297 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp index 64c7e7e6a81..427bc34a58e 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1e9c70fa126c25195116fb6414201fa444161b554d0941cff8ae93dfa6454027 -size 646439 +oid sha256:ff8f903185eef04d5af988f2728110f67ebe58512c2b441f4a16b80ef3d1a8fa +size 663409 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp index 8d32ab978cd..bd6cb9a409c 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b6dc16bfe268c9713e96348c731e19d18bcb8fcd6484702bcd9bf0b35d14f94 -size 723011 +oid sha256:087e834f31f6113c9dce85cd96b8894adca175aa99cb8c8c8ee1df152c7d3932 +size 739291 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp index 0f3936f414e..ccae9cb29ac 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:26b71e6c402b6d4e330ad18c26a51cdbc300e941ecc95896d00e782b2f1d006c -size 639285 +oid sha256:b1125013a89f223977f289db6037da2e9da7abed138771a480802dbb4c3fd0a5 +size 656255 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp index de82f010c0e..c87329a14c7 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a5138f4fb8b22c5e33df2404a39ec7a4eaeb03ef8a6a5f83f09e1c5d48d9a30e -size 801633 +oid sha256:4e7def3b1490828bdf94dac26d9a1d6553392b7231599528eb6770c8cc7c9a41 +size 818703 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp index f5dbb2338e8..7bb361dda37 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09fa798601e33604c9509a3d831182fa716a7e7f7057a48c60fc077fad110abc -size 710853 +oid sha256:ea335c501610c2442e0c53648e805035a8c78964dcb1c4cf6157e6923d8adb66 +size 728021 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp index ba48e98ae8f..1e842d97a3b 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c10e5656c7bc67adee03b1f1a4258195e3f06f0ab6c170dabdfa38eaf23896de -size 794579 +oid sha256:4a9472007e252ef36e423409395f795d7c978ae08f9c7a23a0e8c7fea9734291 +size 811549 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp index c29091e2337..76fb969bf21 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin.cpp @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d1b1aafc370451fd4c1c0a366934c7738b78c17cf190f682f2f9dca64d447d7 -size 703699 +oid sha256:5f293ed3c89cc8d125acce49982a619eb8f526edf936b75c33d95f88e6c1a7de +size 720867 diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/kernelMetaInfo.h b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/kernelMetaInfo.h index 5384424cbca..707269157f9 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/kernelMetaInfo.h +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/cubin/kernelMetaInfo.h @@ -24,7 +24,7 @@ namespace kernels { // clang-format off -#define TLLM_GEN_VERSION "571f61a5-dirty" +#define TLLM_GEN_VERSION "1cfd7998-dirty" #ifndef EXCLUDE_SM_100 extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; @@ -70,6 +70,50 @@ extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunke extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; @@ -1482,6 +1526,50 @@ extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlid extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin[]; +extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin[]; extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin[]; extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin[]; extern unsigned char FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin[]; @@ -1929,6 +2017,50 @@ extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunked extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; @@ -3341,6 +3473,50 @@ extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidi extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len; +extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len; extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len; extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len; extern unsigned int FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len; @@ -3773,1868 +3949,1956 @@ struct TllmGenFmhaKernelMetaInfo bool mReuseSmemKForV; bool m2CtaMma; bool mSparseMla; - bool mReservedParam; + bool mSkipsSoftmaxWhenPossible; const char* sha256; }; static const TllmGenFmhaKernelMetaInfo sTllmGenFmhaKernelMetaInfos[] = { #ifndef EXCLUDE_SM_100 -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "300342b9a92aed1b6493c8e61ae23ce4daecfc36f1874a8afb110f427c256833"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "e4b79da65c294eb1d6cc8c0ad8f3fe424f5a95766a9a110c9eb9da30389c222c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "3e404a51143e4c309017b8bfff6a69c95deb91afc43bcf3daabb674ada6a2ae1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "4b9ae5ce13b3bdc619cbfd7fd25f10ce0d34a239b0e3dbd5ac58a9ad12569b90"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198888, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "98c4ccb61fc34c15385e0de2ffab4549bae6a89df2360b492dd992b5f9a84dbc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 195304, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "60d64fac6b7bf2983b020bfc200c52271a35845193a63eba74c1b717f6fc1470"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "785c1890ed2f8b90aac3a438914ffd0abd14f3d1ca898da04ec139396032f728"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "f475b18688bba375366816ce595859bfe405c46f1c048a7b3b9e348a08bf3bdb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "b95f6c72ba50c7977c81b17adc56820c8785f8f64e77d91e179fdc0ed75745ce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "ddf7d23fe01414580b5351895a489591b455a55b0c0e9a4bd73f40f92b67db0f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169296, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "6c5ed67de012810c3e0a2ba5de8e46b66c13fb3db6df28ee6512dd8154afdbad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "ceb71929b0acb9297a5a131bf36e1a4838d9c8fc86937390f19175f08ef4fd63"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "61719fca0c017dc4a128c61796301a055d502a3adad49e63a77084e016eb2e56"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "b21b6ed343d4dbbc5d68070d784847d54dcc4c7cb54ac799096c4ee8011d94e8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198888, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "0087af7be3fc8ec7ad42e907b2b9a7b684c7809ab34e8dcb93be15c38dfa11be"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 195304, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "9777ecff2a24496c1d3f1a47560313f1793875c23283464773e540566aba6952"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "5a80b04be6e34ff4f5a55fe335466a4751ff6b59506590eaa2270abc150514cb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "7c52465e33eab28b45bbab1c8184658357108dca5a898343290bbd5e8619bda7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "5722d9aa200f13ddff04c4fcc293fa284d7337b6abd8790a2caec0da380a753e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "bbe56dd3c3a365d604cc6f54128135b9ad068732ece00f213a3d193215a0474b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169296, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "6e26cc75cd6e388ba66a4485a1c0dea99fc8f1e9672b8d1789d837718b18eb4e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "8e3f7c82600c11e09fe5bdd9e0dbffb3299a28e70085085ecaeed061360cf93a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "4d6cfa83c4967431b9b4a18797d405b4bd247cd7de6999e0c720c1c757347315"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "f8004f130384993a08af53d97d86fc7ddf317fc8a0d78ba148a8a10c4b8ed625"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198888, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "f9dd19b0cd4922a4630dc0916260dcf7eda51a045cb291fbac94d3d0c99ca0e2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 195304, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "5d42f85564aa85e43106f72a585351cd97f1842f94a793a5648eb7134cdb39a9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "9c64bc98d49874d51c6c04d026a1baea37881fedc8411e3f8db56afd23a827fa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "f7618d2c4ad49d5a9415aca0e6e43016f06f726d6f2b765fc13603d2f1e40e56"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "041fd5fc738d7c35157cd53e29af79466e5cc78bb613520498d2ed6b2627b19f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "179fae60f1d278cf352fc59f6e72f2230eaff0149411cf83645bc6b9bb6d6649"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169296, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "e027cc95042acc2d4dd723527e010fc79df1a330482caa424c6f3badb6047df9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "1245902d5570c144ec52e651bf3f7b405d79548b914e1af425f06e6a9997b3dd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "40f1f682ecb27dc36fc8a0c4ad50af1d849bef9171b30fc5e32c0c0d12e966bb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "70c0fac9bac1c6a16d66cecfb22f6b0bc25ee7f9456545d12c574e80c0ae19c9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198888, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "c120294c61d227841e5c4d93d5091c4356d9f6c30b84f6379606a3ef31272ebb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 195304, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "58524c1831a1d5286e21ec7df8ecdf04dde77407ce5ff77254f4f0974b51c98c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "4c8656fc3e8294617b40ab700c8babc3cb0a42df3a759b2f93cba2b3df61d00a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "f113ba0d0459b4c492a825ed80f4921b672cfca2b239d4bd5f1088f846d5fd19"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "83aa728f5577bd78c1700754b3deace99b2dae7feb80af63df070fb7aaaea3b4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "d976d0a22ca0799c30cd2e0cd5c6102d53b62562af557675bf172e706c1d63dd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169296, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "25614763f42762ce1a3bc1a9b224a920b55e7e7c4ee79a8fc2253e173238130c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "e3b589f8154aa443968d394b4c2151218a7e5f93258d77ac55655564dc293ba9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "dcd4a1827505bd2c8a467952ec560ad23b2a8bf19006ecf32885295b91e1986e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "ee298aaf8f168d194887c5509c35ee0f8ef8191303b424f15c8f8d9f1a8451e7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "7bc60263300f24265f691da6394adc03342e7a99ee76c6e3dd1c63c2c2f83115"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "f2f418baf0d58e78447b3734ff81fd13e3068617d9a575cae9f1584d133b59aa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "e2da32adad55931cd954b76569be9b2ac8ad31e989d311d66dc3c30d3fb7a071"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "09dc0975be8b469510951acf7aeb9c8ce6c7e009d2b0aa7c472f77350437ac9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158072, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "fca7bab144e56e91ed5b15dc31203903b2326569ef6365ef7187c82f29bf70fe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155512, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "6088e616a52833c8774407a8846459be60d96968a70157a8569c0cca0e4c0e01"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "d27d5015e03e88b7702d65bb924df960b8c596e2350d98398a046c189476b5bf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "893ab07300dbcd5bdf1e200e82eefb6968c23422ab233c2078dda98cb4e4abd8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "c89fa1eac51efb5c36a4bf59754258211c98787d1aa941bf8489052c97a3ace7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "02462e6fcd36a43c58f86ce5ba4bff658f823ee5543ba35d696255c2ec27bd18"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125408, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "05880821f0cf113b47f8d061d00c5b234aaa024b0e3c759306ff6db2ed72ab1a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "d48110d56931d8c72ff0734475f1e54bd0dcd2f1fddededa33dc80d33b1643dc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121312, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "ee71836a70b61e36e647568ac7530f1bc6dbd761b174315b2a38545529026b6e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "c0cdd17b68a608609f45171c3053be17f147a7d3cdbbe3a730b8ac22f441191c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158072, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "da3a849559946839f009ffcbfc5bdf7aae6dc8985981e7c84e9341747d007362"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155512, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "6fadebb40c4191edca8ceb08d3a430fe775bc9c96ef16d0ea9ec297da124b22d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "eba504138ee2f545e9d31d02557a20b647a46b3b1a672dc4b7ee8e2acc2bd901"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "a324e04531d2906fd3ff5961ad6c87c261ff4d4e53ba109d13be1bb219fbbcf9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "b19653791ae783bc72a39bd31551eac528537d610846410293cd951c2f61b66a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "10803e293e0e580db693a707d70524afddda0efc066eaee188f03245ef6d3d55"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125408, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "2b7583bcd1c32d57810533213e108833e972f2a166f81e05c99cf315501bee0f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "fad66ee4f6180297970290e7effd58c506291f8ec6a04850837555afa8a6f86a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121312, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "0665450181e7227fdee8584eb14125295d82ce1b89da1c01893d39dcdfa7e18f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "d447a43d4ded5a6317f31764209ac7a60d4b98555515941dfb300f795eb7f1e8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158072, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "836e599ad3a628f6b18884efb11f10e5c135b8fc01ef52c8656d54526113152b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155512, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "263e9621816742d121711f95ac2119e53a609a10eb545f5b1a0c9312a4b67a88"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "1b8a32103447f28b3d389d7b27527a07ac806c7e7f03c02a920042d4ba8faf60"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "ef19f03ae8ccd881032f298a1348b082b16600bf04728cec35f93e99185a2630"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "9e0b4d122dbc0d2096eae94de23b02227586a6b994cc830530d627a06400fa0f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "caa4ccc68b100a8ce4913cd516533a1f40c7c1c8c6ea4806ea1db1710a6e2f9f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125408, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "2991a70d9e07e24161bc5ded2c26ed165ac93f4cf12f4e836cfb0cfee3f2b690"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "a83acf4b7f75d7ec8bd7a6abdfdde78477776d356d941692cb45d58300502874"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121312, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "0dcb02bf8f2f4d6a1317ac540de14bec58e78600201d236db7947b8ab15a258a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "62f48b3be4fc5b1d7ed78df6023e1e2b685ca81c4f5d14b9b3910e338cd77e73"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158072, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "d7cf7375b98a4ba9defc8cf40396c6a0b24a5ace5519310584c02675fdadbad8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155512, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "f5f9179d80bf3245fdf26b650fb118690ec3be553bedb5001b91e1e54b5dd3ad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "b134f7e6cb68832db8997c6562f5e55743d14d3e05171a3403acaccbf2c52340"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "4ff4af2ee0d42b5848ed91d14b6cef6787faebab0adbb8420d5dea7b3ff4b6bb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "e1a44ae68aa0dfebe22de5a2ef2bc3a5d4e2e236b75e497cc63da3e34176ae96"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "4982c37cef97b5c1c6283595f79ec9a90c852d7200abe133400ce19825fd2739"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125408, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "d9e8a2e2632bf607675c2e6ec968d769401d80ba720a6a4ced1ceee0c8cf27a8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "805a7eaaca1f14cc9c91d27e803d7d55bc70b55f0b1ac650bc4ea6cb58b8b198"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121312, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "2ab34b4f9377e5602899583203cbc7921de2a1e185d83d22796907ca93ae03a3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "0b5e6955a596bb46e189efdb06fd19db1f301d0533fa9dab2cb766bfc7f23f53"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "06bdfa05a8e94739151a1ecd976dbbeb8c01a69bdd833dfddbacdb48e78a840a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "1de247b1055d71d453d0b3c32cf1172abcb5d7e7885c62a05d1f36f379ca53dd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "94ada0ee2258c4cd812915d6e7446e3eb989b4990a1b83504d8cbc50315e7262"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "d3f970c8699dd7b9ed025c4685a595b3bbdea462078a7b2c79defa21ea427077"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "ad0c86c0bac6f61f73ad6727bd51931a00fe90e78964346a512b10a1ab36f3f4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "4ea0d669bf46d83100dbc67ece786ab3c6f094eb68b781e837d3d124ddb05c4d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "d6f69cae4996620c49f1f049c5d37d3ed728b01014d1f1863cceabc865f9fc07"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "9f4bba583534e1eee307939ec88fe4269e292b07090c143b205610f167945ace"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "f51d9875af2c1541e28ad5607cb44e3ca6e05a2109c250bed84e7f24bc77b44e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "a24190cdd669e6b4335f07bd1f5533416922be55e8539aa1d04ce8490ca7b9ec"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "06a24ec7a3de862f346860f495a451cd4841c45aee5858e23b55891b54ea024a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "8668ac02df350d356f8a7bd53c1cfc58f26b8eff07c63d7203bd21440098ada6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "5651e2781c6d1ab46e71c91cb57a2a0e072418f5edc85b98cb288c9b1f250c26"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "a7af2e131558f9768f8664344c02ca98fca068c42c7bb88bd08fd5bb92c9a811"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "6444543969019e36fec099a76d31f9c3a2cd5a1538ec0cbb0ce7e4c460dab7dd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "7baa7431a16b71faadc2ee1d287f8a31089076619cb45dbe60dfa17dc013e5c0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "47ac57a4f2ef7d33e415b720e2d8a8fb77cf27fc6b1ba46ed7a52dc80f2eee44"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "242cb6e42f949c157cb4b95ebeed48896cd5a82aff220d6ba5b35655c2a76789"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "dc48f154e0c6e6dedf9a01ea222265cbe5b534da5c68173a01cfb86059bc0c23"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "831fb3a95c8041d4d3d87cac7210cfa951aa0d09aa431b93b0dd2a07bc4aeb93"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "90db95dddfcb0628f5ef36d3913008a70f692e9043eb4f1e34119df39aca3879"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "ae98bf60f71d70b2f4da919137b86bcbab13e363bfb4d71c88f69205aa0bd401"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "f00ed7efbb2e1e8f62051f941169475fc82c96f32903f77766448c18c8928aa5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "ab7db13d01b43e2b46b7bc0687783eeb34b90406973cef5652875752dc35aef5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "99ab8b3d1a7883574a5ed4caf8f57f3fb86caf30d024c43bc4f51f363a4b5780"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "019744b1d1252b921cf393a52cbb04a6f7a0aba03055b1710a5b0f7532023445"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "9540933253da102e27177789e98f1eedb91b69bf099f37cb11386890590ed37b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "417647518b1a2fcffb6d37bdbb4c30df7a5f5d949a9bcb3f926023e9b6d61dce"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "92b6ac05958d3a70b3b78ed8582f25adbb1c28078c452a45d68ea91f5de24c77"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "45c44be9127acfdbb686c9d6f9957b634bd6690d2ae7f234e5bcf5a354a14a31"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "e2d08fb17527612bff137832132c30a5e56ebf8746d2e33ef15ad90e8d9b0080"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "edd75aec6625bb0dc54e60b4068d5243f298d2112514d77a6f23b66bcfcb3bff"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "bc00810bdd35c983197e69756af42dcf0e156b740acc38862012073b7b6cf9dd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "a02dd62dd46c5b64554a25bd2f4fd63eb7c2978706e8bba3f7bbba8a5f97c58d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "97e5584edaa2d53c07c69126cbeb92924af21e1885bb5507fe0889f87c3c62c5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c68abcd0418f27666873a065e0a95bb5b6f9f6c6b2cca94dba83911212f02103"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "92c93ade3175533b11049efaaf1cf4f2884d108c8f62976f4aafa84d4c6b2494"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "732b144fee41cd7ab1763064b16e41fb2719279aedb5be9b327042902424243d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "fc37e19ecdcb07b1c93d1472811a3df92a0b6f3192fdc7b2ff9129252e044c1f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "1e78a5aaec0091bbbdc33c4ca3cef8a16f833f7624271d67e25960720f50e65d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "00d52ae89284d4ff2058ce995ec17b4d63b8e53856937902bc6e44c8be7809cc"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "27082465041c502b6c3a39fdcd6f61938c22d34b0a1ed75fc64b0f328788a4b9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "c9110c88bf42c901a23f1cca72c5809d4da777a96570c48541a6f791fc3fc778"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "00ac78558839cb1b931f658c3cb4830cbca8cbf553fbe3f3c6834cad4438d815"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "b0d38d5b6438284b6496ca9aa3020b17077d9ba3f458c2158fde6de53fd3ab61"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "6859ff5810c8b2c814dbd25fc5f930cb7b1ca16833087222a848df5d5d590b3d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "e4ab9f5df9d36b498d64073afa1a432f8eb75ba0b997f7fcf9f34570bfe7a84e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "4b88121d49f3dcbf1794b32dd49f749715f2fea8c996a2588aa815f60dc435e0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "9743c3a2289fb41ac65ae43ca6094518e0c7811f871967adc71a2b0f396e881c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "6684498bb8665447c5cdc4ef12909ab5372cb61b5384908380c100f9fc50c5f2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "0fa5af34e5008a588e9bc4a78a0c66d8a32bdeaab1721532f97bfd437c595bd3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "074f561cba1b31d797f8e9f08f299a2e420a2e386a5814d4230340017c792b20"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "3a0056c1db1ab2d711e9e21fe24d23e8fe7ee167d44b5f759c50936093d80fdf"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "b8707827be243f91e9e3e67673da716afccb99b490f2af27cf9d40de1b1cbf20"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "54bd918b5ccccb2ab78a6004376401322c323ebb333476b82f983948cb0f1671"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "4b05cc4e4101cce1e71ac3adba697649c6c9bab4473547bfe71263551a75f43a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "72682633b742d13d6e6a485ed8662fdb54e88f0459902d7fe8afbea4e4ac304e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "7751c61c88c1442dd6c71bddf0756dd6e4febfe25f60b1158785ebd61c3a378b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "e541e2d213b92c0d79a9cd2cfd93fe03ebfa541ee9ed40ffb1400e7a8c90adc6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "6f2c8d3b8108665617d408e2d3717a73394df7d89e4173bb13e20f311378c51f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "ae17ef89c8f9fdc9e0cac34663285bed9c3a6d613b83cd54da17d3e93915a3e6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "9188c5c53301ba996c29fc3e73411737405b9323379056e07c47cb35bdad7b7d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "39a135ff7fca5c8e36130db366c4b345071983e4ec9321f261b2f7b5d5bed2c3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "174140e03940cc58042a17ad40dfc71bfd8227276e4a258a04c4185c2c56c3f2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "f5e93df12b3cd39b52b3a67ca3d5cfe9fb07e1ac8de6f9f2358c323902541ed5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "48610e985af6c76bb1429ca9d202c644994e92a4d33a813750a687eae8db2e91"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "5dda6346d89b9c6a13d2294de31622346bb9cd99af87b300d5a3d109d0ff8a63"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "72d6f28abed853a9c34eda06eb5a3ccb40783f5f6b5c0d50975d6e02dfc5b0a3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "03324400f0890d28cb7290de47cb2eb1386657dcb88f9494c97c3dce8d7104c0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "989f7ec60d0b9cf5dc7e4a4349971648d23af42ba0964629cfcc29926576966e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "7fa45872fa049e20af9b48376e8f81ba696dd88c25416a108e1def2d7728ed07"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "8a5d68e609674fb41a0ed8c19aa3e80e45fd4599f2abd95835b7566990877b49"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "c57a9a20b31a80b074a62d85b1e27a58b2735790f7689b69d57a29d9f50c38c3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "9908a3001bf0412e8e77b6be40b84f95229005a1c5303172e87c364ef25e987e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "02e5978360c6e6e02b8539bf899291ca23e5e6aac6b6c9281c94df57bdf15e4c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "f35eb0b5b213b630f55e2ccd499588c8adccd116798e7a8e58b4b8a92db493e5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "4ce0e78d370cc20933ff95c41027c4886832b1046575e3f421ceeeec01016cf0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "92c86d9437acc858be2e13a346a301fac2d889592ba6f5d550ea6d487f34e151"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "1a46cbfd9a963cd2b0a3af7075e47b0dfca14e4eb0bfb548cd9536b3427763a3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "437b0ec971541a3923866be35a7501036b39482cdcf4c9a2757212a796aedc56"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "7601c332411a8357d675a77d0b3d0f5799deaeb77c894a1c5fe85da83aaa2f7c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "08c6d4d7dbb0225246bd7cc73577f24856f5e5b8bbc221ee1b422dca05726812"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "9a6bdca33fcd1f803d52ee2d9affc5452a781e24f04c19b2b99e7e5edf134b77"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "bb20c7fd5b90c681af4e1dcbe5e9978979e2c6251b022e8ae79d4bba01a514c5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "47b6043df36d1cad542db5af732371a39865c772503871c2faa2c4a45db74da4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "f3b99bd5ff527057127898504603488ea5bf42e39c599264ae8bbc577c5bca02"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "836838861ef56e02e54e85b09cf989db34bf99ca9b84ea04a50d7dd6174ef002"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "5329b834ca37bca6dd705ad57d9e123e589ca30a2e08cc32ccce43b118fa01c1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "184bb51a07c5c8b0a3f3cf6b7197ea7583b0cb52dda0ffb832f31c66bd787c85"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "afdb5d45492ab8d3859b7d5d56b78b4d91bea26fbf2cbe9450aa4f1524f766c8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "c9b018787099a54d0186316aee215976d224d541a232ceadb508e6aabd0afc18"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "539911ce2c7c6f4d84c1208957a68871f8089c05d97b59e6afbebbafe123890b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "bbcebc67be58b4418bd48580f7c62666914efc1cfa831f24a05c08a302981eb4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "024a87775d1b2361b9b161bb7805bdfe906a5b2b22f10f4249391d856def1136"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "0c43cf63cc11f70b794eb1b29a22a9f478398adcec80ae3cbaeda37ecbef6f8a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "f0bead85d00429c0029a8f4d5f390a24a5c35a29297fb17436a6c6efa85cefab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "6d8fbd1ca934b1b7bc0bf0ae15ee7f9893761c7fdb78e326e9119399e22542d5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "61d634e33fdb79472b53b536cffd88a3835bb18718de6469718651be4facccf4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "1392413dbb65ecb691de18c2a37fdfd66823fcf0f5fcfc978f593f038514e0c4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "847836594999d625ed3461583431acb68b554de8745499d41284cf3a9fa1a1d9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "d69855bcb11d065bdb4fe9df5653172004948d1ac5caf6db850df9968ff193bd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "b7d26aad32bb28dff9b0900614c870378853b8b128efa137b6bb2d93d88007d6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "601673be0d2061e7f898651ff12067bf3601236897f7830e5633bc8aad3a4438"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "d9638eb504f77a1f6f830089e2a7ee2efc3eb6cbb04e34fd3d172bbd2d36f62a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "d15e08f7f53b7fe9376116174e1d9abd13ce5678c2b273a5843555390fa3bf58"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "9513046c4117b3508da6e18ef3757ddf38813ce743fbc54ede80068bf465de30"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "34fd9dde424c57a18dfbe8ebf2a5a93bce37d0f52989cf80b6389237457e4f4d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "d532801c386b691258c57c600b4399a818ae6e498acd3bb3b0331fc8a0d5b7ec"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "213a4b07f8f9a3593131a1f544050f7e81403c2e29524ff860bcb61dbc7916dd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c2ba72581fc91e98e3b2b622f893ecafed6771828dc66b3c3fd7a3cc8aee0b1d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "7773a5beb45828a2c02c846faa94649bb1a1679650c323c3d702f6fb3f7bec71"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "df618601f2875b33febac49737bd0cf35422db41fdc4390ba8a48fe6dc235361"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "356e0916cbbe80a70dc80dea74987f5708d0efb979a590726abbee4e10024c5f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "941d5919e52bbb8df0600c68b20ea92723510571f78e3160832b0293dfd8a9f0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "0a32ad69592a50b55fc0ab920a9b47185ccadaf515a1303144de09fe9a4005dd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "7a6065c8fe49fa31a1c0bd5f41aac2da8ba3f218d31348f5d27faebaacf905c0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "10e40eb4244c889bfb8a6e4e653ac71e317090b8ca9d36803a3cd40fae6b83c1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "bb4099ab267fd5e201ae8b99fc85430bc03a40e543071a94dc41bd974116417b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "bbc59a5c14d506d64672c38e1bb5402d33cb802c4dccc6f5afd0a42e4ace8f04"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "15721e834cc8dcad5573a6d57aa270d30f6934dd4dbf4d4f1e28decbf7bc9391"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "20d96bc600ddf834043c619846956bc6ca76a070930525dffd778006b2e08fad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "4c03d862365364c94af7510b2d7e0cbef52e792a2db0814590b075d42ef05434"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "581c2885c88321c6a735464ebead688192c8eeccb49faacc366a90abf7585936"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "7fd3ca5b78eceea59b0c3ed54bc970cac6576a652ea44369f76ec603306b0ae7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "2e22397814387602b564b6db15ffb41f74ac060e2c0e5b6b65a646bc131a811f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "4de440bae70e729e98146f9b9d1255416d3dd435b65e37c6611f0ae47d62e907"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "4fb6d5def9657785399450b0cb4f2b0cd3c752e9ac1be73572c148f53a65ff6c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "8384653615866c7e6c7c93742269bbda6ddd2073f3f5d3e6a45d9e7488c23ba2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "561c25d0a7bea2d4120c1d9b724759d6b241859f1a20e87e6d73193a9363b087"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "d8f69a794b36f48af05ca103b4aa46249fe5cd852713073d89b53b4832d7cb2f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "0fa591baf0a430fb4e68fb98033fd18a35985317fd7501e0cc41b2e42c0e99ec"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "911add8cdfaeaaf86588e13981c263e0d6f9585576e1cfa4d1c80f4abf921fb8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "5f846e97c0ab4047b54c7d97fe4d201c702174fbf9cd0f2760d9609b51d2155d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "9025e4f4cbece8289131450007158be761efa98af67e68b6722aeed3e3236625"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "1a5d687161dcbbc79c785e9abb961a1f320c71e632484231ce9a5f9ff37d39f7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "d1021a1bdf86b7d02e4b027ddfb671e9fcb4637631e4a44e240ad8a5f0489b49"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "69f1693edc0118d27db0560fda2d22779dcd6a9d0b499ba52f3f517d6c746f11"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "7667dd5190a5e89940e456c267dc90918237e1e6dfdf63314a3c428a28e8beb6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "3211b7ca21bc55376f84067ce1c72e04d36bba15c6b078a78e606ab890dccf05"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "5165370abc83a2d9413a8d09f9093b54b2ffc544c6969f874b650882e7b253a8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "784e2493f51befd4685438c9d3c959bc3ea03171ddb1620bf9f68d94535eb312"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "03ad46087c3bfb4da06eda4159b08587e4a4af77689152758c03a073df573eff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "7f753557a66279f47560424a590fb0d04aabe57b707da02379fd994d75fb84e1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "da08751e93785df94c57a9367454b1150e3281f1283886c62fc8ce458e7c1186"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "7f84bad47aa7119e0ec701dfb7f687ef34390102c3b3cfcf7cd6b42bb4ea345c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "71013f9fb26d486428c16693023512e5d11d30a3963791ae1a00015f36973c68"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "3b3d609c1746f35585122570f39d1f6fbe79f9ae574f331e3c74ead97db5009b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "b8e3d7233e4059bf7ef9f6d2a0c8e1aaf34884d3ecabe46384838a568502b193"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "164636f5a3c1ef5aa02c3edb8ea944d7618e4821fadbfde9061c03c381a3f48d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "080d26f622807073c017d857ddb0bc617d81d2c02669a001dc86eb5c6e050790"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "ae5ba61c519b8c16e23999fb8015ca4d72a41deb5c19e9713f4430c0c124f409"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "f433d4d0584e264d82c358961226053d95dfa8fb7481e6669d58862717e264e7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "336170486c55d9913b0c37d57ef18a855d6886376f5546ec4ed3234ad84092fa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "b3fd1a089401e17b0f0e257bb78fd4ecfe9cc0d3de42e74d50f9f4a539d65c8c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "5eb7dae6b92a81b8ec0db228049a45cd1bc2e390a2439c2c0e8176837453c26c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "38b9463321470fff54e91678b9d30d00793c1a447a35f5d7af25f85ed7f5b4e6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "a8f4cd68fb7a62b8ca43d139636df19563896b468f7acdba2286c0d45d2559f9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c969a4d68f33c56b7a9179e0c7853994a8fda1fffa024fe53f197e5dcb7aa4c8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "a9a817c4def3fc31cb32c428a9504f993eb918db4a779f8cc3d28b662dff454e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "83b675b5a5111f86aaa048ea9c6a938d4838f195244791cffc9936ea820cdc8d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "b40fd8e1abf8c46977546132071c6236189fc11d45995e1696b4a8f246ea18a8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "ae2466eff19c5a4e24ba3d6bb6d93b85d64c8c6a823edd88bf0877251ada0cff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "ca2043b627ddc96d7d12f3f445847566d03fe8e0b3619cc8c9661430d325b3db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "b0cd8c3bd868bcb691c2975c91736e803096928d428a3c7b3ba5195fc494cebf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "48f664f894c8d9718ed30b4f04aae2b03d8a5c4f76337e62cba94762f8e49767"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "276624c661691dfdbf34ba4e5ffb6486c91b6c7b8144d2d9fdadf9431badc400"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "78648aaf3a38fa7a8922797ba66ec2f5742ecde4492e6675dce2051f8ebd5c63"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "0afec0522fda5fd8e5b77a99cee098a368aae5e9e1e7228846c8798235929d28"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "76aa37dec4996a9315f47452fc64a091cea396c861e11fa8f0a814324a6e5828"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "f3e2d5115de45c6cef5fa9bd95fb97080a1403a1dc0aec4244d5bedac6313131"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "a19ff0e1c8feb2f8ef86df0a29673e66db7d1875ca5b5e54d7ada71c462cf2e0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "a438329f5cacc90952073f6f0d705d94da52e1e5585f678e11d7378021d3c515"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "dfabd7bb9c2540ee1dd89f522760c832f69f1b591a4db0ca0a18fad401c246d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "a372b585da1ddb9188ef32db66a96928ebc4d39ef1f52b04f9e13e712f69905e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "1735e9ce2e33ce4c94fecc228d970633ce853a10b6c6fb6743b431c5116a9a86"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c7f1f73a6b1413ff3928a730121c763fa5028e38a788480efceb4e3b46f34aa3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "8a44cf5a991460ec1d142569219d1dfe19b6b446c4f363bd1be3e5f12892780f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "81e17fe466a58001df3dbadfc0e68708c0b2255aef531bbc607347e4702251a3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "ca9362a45b7919d4f05ccb2b497b8825236e047f5e41a81ad213b1abc8fa8347"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "2113502c3afb94f9f1cf921c4bed7c64f8ea84f4abce31755bef29d725d195d5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "2d7893f2c471c0a35c38a9305231f2fce6d84464d024ae4461aad8b504e9eda3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "9f2644bdc2b1b8591053f3eaaa47496eb533403791c4877b344426a5fee52ac7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "3c4b96b6d3838063622f1c42af09fdd77cc4a6f37d0124d011b7e0c657a53bfa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "420d9a5a11601880909e549ca6079a25cf520253c5202f78196341aea1c54426"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "3d156249f88fd09c4cf14b2de98c761de6961e00a8cf31e962a2e412247889ae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "cabff4364fb5f8b3872ad2f1a545b4485f2913387bca17fc4b9df9a45c7ec311"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "8e0470f350a7af13e7e1bf2976d2dcc1965213e6e7f2caab85b2c1ad1d04b7da"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "59592846ff5141e28ebdc006c76be05c30837e728b89fbcb4948cec1a2c041b4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "232cb40e112e11677fba56e0ac164a718881d8f95412ba04384f9ae9786544dc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "b093b0ae2328753f0789756c77777d353640bbd6fc64f69f0d7b4188831dfcbf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "fb261e07902406b516f6080a4138df619c8bfe325d7565552f62d59453cc06b5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "36fdea8a2d8394598efdef44bee5598d73fbb1251ec8c1039d96eaac0c6b78ef"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "1bbc3d14b78c96203f2ddbc500d13104c44bc84b681f9db302bcccf3a7c7dadd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "cfd3059c7fc73b45f937531a59c90935e5e7306281f606dd2839c54541721bad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "c9862395b53199d65456ebc15cdab4262fc3da19a35e483586cc89497f0b0e67"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "cace30d3c9bfb67f7a3fd2e21022b0a6cb78fbba4b360377509313ade705a075"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "a0a5ff27d309408615886898e98dd062bd57673684e83dba2456f55bdaff87c0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "3df80f478493258f120594da6b5cf47eb14404b9c445e815ed35157fa9dbe70a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "44a361ac442f7113c0ee39189937527ccdd953f76b6240cee054ccf761ddb6cd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "5a54ede286e57f059de91ffb17377cfb8c01ba2f368bd747842f76cb75e39898"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "40a1953559b27684877a7078cb774eb967cf7bd9b3d4a631e36bc52b35d7fd53"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "22c6eaa737b01ac027eeecd70256435160f61a21e224ef9cbf76eb1198de38e7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "cfaeac3c3b96aff8b910188a231d542363e3c2c071ee6e4ff969c65f6ff26f85"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "24ac57c0d85226b6df72ccb09b61305cc6279ab27ec224c388768e70305f0ec8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "2d552d1089585762fd01447b2a04f193f3d78f86458c45bdb485a23c9e9ca05a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "c7c4bd71802a77351ed7376a7f26810f7a45e316d8e026f1045519f629e8d28e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "87e9ba61d23d52bf0ace72a0bba60459b1ba3575fc92c1b53a3cfab41461092d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "948ef11536f593ffba20d7b252040bd65ef44f28c6857b86aae33057193d9118"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "8816cf733d9d06311eb4b5247889a1bfffdaa7d9ce8c72df4951df79962373f8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "715194ee1520acfd4e694c2245ceec38c7f076b7a66b4655abeff555cbf4f82b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "dd9ae804e4679868ba392b6184ab4957ec8d46fd7306800567d729f414863249"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "a21e771c18255eb059ab82832ac66b1322444dc5e3cf2e7aa26b4d246cfde113"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "d1bb2ea4fafaec4f5abf5d4a8da12b534904508feff1054a3883548dce30f27d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "0c33ce3fde2361416521e0791ddd3b368534b11c4c6ea9161acd920e9ed2cdcf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "f132789dd3ad63fb69bdcd34bde0a2b5fcb4d32acca3da942246382c8db6bfdf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "f586bfa21180737166cdf60382ff454577afc71ff66e80b4e14a79101b283e30"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "3cf6fa490f035af4dfdaf5f050c08661bed309d93765541583c43cb5acd11f9d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "6e6b1d250e42ad8c2f62b86670d7dfad6d7c71a70b2013f374e2632179e4be02"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "75720090d9caaf76f94f8e699bdd9e1d2f7cbbb3bac97050f606454a6e8b0123"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "33d21f0982b7bd801d074b57ebfd98f37c2e984a8cace447b850385a4aaf1f18"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "e8a02afca6eabaf1129e48a8b2bc2ff5d58094b9a4f89373021968e667b017a8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "a24bb008c0e87ff8fe56f999e2875882088b4beb1ee63ced311116d0260c8f5e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "1c83c48dcc7b984a4492c50d10f573dff872b7c8432068a50a166641d36edac1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "5e9b7f6a7b4b13205d7455f68f342fe368f40d663a48ad00956d693734120142"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "80d47eb9eac278cb133a010ed7e5eb47aa8a8c4ce44665fcd5bab419da52f99c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "e29d96e43fa757205260652643e1336109cfa22ad53c950a709b687818c1df7a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "aa5131882f7b62152986273cfd5012bfc668bf1d4122d35473991f738355b267"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "3e41aa4e16eb6e96a96db0cdaf52343ef68be571b5c8ff993a67ed0847fd07a7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "98ba97628486e6cdc2a44f18ab28c0000b0dbac005ec5ad72dcea2556d0bb1c8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c193830d397ea4e01a669a40230167e822e21501ab36ca014f540be43ee055ea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "b13e2f33468628df638f2fd7ab3a238363063fd596b453eac85d2386d2ca6041"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "13da4f8161c52c022a461a2d85da5fa7595f82ecc9b6cf8b9b3516b69323037b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "63cc477911f4c12f31102648945a39a1c752d38d1c5ff76d968cd98cf66b0e95"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "67667380687d7c7629bfc622c624763d84b5b3ff53916009a1990da10ce1f597"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "1a3855a6cf4a1d2c2f52a0a33f439ee127c908ba691afbe13b3d7b07574a30cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "5a8970810ee8ec7f0e29c17ad7657b1eaf02b4797e4210ddbefe23a20cf05a82"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "07cbec9f28b58d17b5dffc63a501d1fab71d8cc3f43eadf801363abe4265c993"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "c73c6939aa11a7d1427dbe463309b9b46fb7f8114dfb51673134e6e09e030041"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "82ff9bec24eae0bc2f3e6548722b5ea54c52b75edea402b9412e339a6389f28d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "e4b7cb9a7faa494fcecc3901c97c576e86e98aeedb9a458e8e868f6b1653149f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "7fbab0b08f884e0fa961b8c12285a938595dbbfee94bbbd657ea564eebd53cb8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "aa569177061c31e34d0d265468697e53f9ab6e3a7131f7afb49956bed2f4276e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "7be6b0c274c9473b2a050c4ca316f0b529cacc5d6b2e3e6b963a9478f83eaf3d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "8eaa4d16666ecf5f1bc429f1671bec4e1ae95c1a4942473f477e68fc3aa3a4c5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "f9f55304b212ee858fd5e22ed521413908a84261b8f6541188a9d8749e06697e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "acf879163eb2433f8861008ab062bf696aaa9ea6f83455a5a869de96220da6f5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "957d580f70cc5f8aec720716bd844a30aac9473d9c1ba2fdfe6e5a6c16804a53"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "9bbdbf3eb5cc7954f4ee71897038f80277c9e6a65dd3f70dd2268f56ed7389f7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "c103a3bc95f045d5cc0f51b0c33d5a65e4df157933a37202e2216f9efcd6fd51"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "6bf76023ce592fd01ae875675cf51ab84c40a529631ddf298a22a6924bc0ff23"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "aa781087dbf7bbf9c26763bff22778e8dcf8bc642a5bb2c98d160a4aaa0e40c0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "0ae5fa7e33157a631160c897d15cb3ca8f627cb83f8538336f0e4a470d088804"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "6b3b26f1625ddb41184aed4dc036a219e6fb8426592abe30acec46abce4820ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "2c4a7c7cdf1aefbe4655f3ee9b416b5d6ee09d735496e5ca50afe2a8a4dbfc9b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "dab3420d10451ca62ea0d2b8099d1833eb5450badb66ba16263809fff32b6d36"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "92eb2584ceaf556edf0ad2e7b06316897c6f6ba5df18989d08d7b44ef713794e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "d301833d86cdf7f91c4a121458265af7234a0a67652a4a78fe1492a00fe2b612"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "99103c54e14e87ab71c8595bffee961452403ea39580ef435627d6d0399e975f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "aa38a78477bfb3f283d6a737315a124f9286613a4beefe2f1158259c870fc839"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "1bc9d24ae9816aa664eb72fef8f0f56c29affa64a77dff592c1003ee1e58f07c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "fee79cfb3ed736ccb89e7eacc4f50b1282de9ad7ee7b4dbdabc5f4562ae4976b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "8c062f8a1447e78284af2fec180c3bf34d3125b808de827af4c71e0d0d5c8bb4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "cb91fb15ebf956e9d75c7c0a4fd3930728089918471c06a216943e13562716d6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "deaf319dec3b8810bc7ae9fa801983c553a4d187667b582451b4135714420916"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "0d066cdecbc6965bb4eb4e14a6852b42569b49d83de3a480d2397e2a56e600a5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "2e4aa4d247a59dd34a17cee4c36f5241143e632e30550f39ed04ac822753194d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "3483bf86ac09ee4f3f6dbf59401b1e4d5122f322950efb3c2f5fe76b67a122e2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "6b4b788adaa86649365d2e3d2fc00b8bc4170bd3acb4f21077806cd01773e971"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "d86c9d9439cbbc00d69c7abb3c57bee545f229ebcefad3497e2a978971f53d6c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c51a71a1c2ec49ac47629b9e1083530dc89a6a0c81920220246db6ff1bc76032"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "309f4e303d3041a878d6c0c384c05ba3b786b8559f12ec2e197a46246d3f9e8a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "bc117d0e1949d51ffe52a01e5798cf6f632da7687b2a1a1fdc8673be1188ac4a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "28c900d4fee4e886e934962bb49c2191607064658e72b981f2a14759f2874da8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "75372253428563db1d2bb3c3497f602cfcdbcc00afe8d6396d64a184edf1319f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "6a533ed418af1d42ae3c755efeadb46c624310b18501ee3898c9d90dd2cb7aa2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "e871c346468a18cd10d5e403498f3d7213f97f44a8b9e3d8b3025770909b13c3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "bf47b416728a927e72613f6825466ec797dfd556b8a43efeb88bfeeb517eb14a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "36649d3612b1bd1007c217325c8746ba9910a5ccbf8f84018a5e3fb2cfc2639c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "55e1f18e088a0121e347f1e243f0f9670b74022772b444ac43b10ad9c1e4ad0e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "2b857d577036427bf4f4590a375316a1e7c0523c5a0893f311974778d666fe51"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "444f34dcdbde31bb22c24c00b305a09205838365c2b5cbc557e618253cae149c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "6954d67ce0b853587514c6b4824d412f304a7465e3af0d0e14553869eae9c683"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "2ae457751692a51381e5ad838756be1d91549690bfea0ef9183f36412a2d1235"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "4c5db0d93bbc10bd3c41b549d1d44e9ba41aaef4af0a60e4f862cbd545182418"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "3768a4ed08a6ae96ae87b678a8a0c0cf4bc1ec6d168874536bf0bf1eb9bd1817"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "62d8fdc5099b395622eeda0f2656020446205e5fb2b1bb950d837847b44ed9c2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "2c4ea9e454643e33a7cddea306f77102643fa40bb40dcc17d60b5ea3258dd932"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "cdca0f77a398dd35f8019defa3e1c8b3045911dcd87dda152449344f5ea25901"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "a3922cfb8da3d7e6627264fee4880614cf65a5230bb6652f7cf3770438175e6e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "c6a4a0c95b5168f593245c4b4799568c1501bd97ef68c2dd76b3a2ac87eafaa0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "d21b5cdca8065215f2ff357e52fe1a75a5c4f5e9ca49fb26e20c064a3f2cb92f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "9f9076920d78078e20d028c23099436944865eab75a92cae4b5a7e31cb4fec64"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "236b4004604dd823b6a9f0c101c5b187b7e515aa6c0156fe3e86efa4cbca7583"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "3854faa1ff64354d58e35a5053a188b4c36faba256ba73d61bf40582df306e10"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "fa63e1fe5a6936051204a7db7f0daf2d9482d19c1222ff805609f58d3318ed78"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "18ab0dad8844ab10bda7f7ce199150f672031f2037939cbdae59f7e3aa882576"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "5aeef65c84b108ea2d74cd7ef75423afbd02b96808a1d1bbc5b3c54bc199b061"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "46ba4030b54deb7622ee8557c8f006c53873f4fe3a10c1daeae0c4fbec534947"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "07cc6cac7f6d1ee98ce6f48d2386e0d1db5d79a8a19698a04a451336bdf18bf0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "6c48625e31c75cbef390f7e7c0dc595c95cce72310f64771d30d7cf918e2f00f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "6206a8301b702fd76f0eff108ebd055b8bab3011235b7619146b07fc67980ffe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "a38c69aa725ebd8dfd8d5f2ebe712c973768ac581fc18473f922759e85f78d9b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "dde79bec2d185540cde10304afe514c3e7f8ea2b736cba94d9880f85ea3cd487"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "f6060b29f99054e03a8b6706a9386905305490cf59443e84e09da220aa84f943"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "b6ea1155a95774b037426fe1a8be846e67939ca41595b4b9f3a5490d86324a68"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c8cc350c3f16a78bac46b6daef6ccc4fae03781e8463525bd0ab307731fcd5d3"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "9ac3752402c41e33bc89e8f5b50324b35fca08548a55342920d78af24d0d2ba1"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "572d73cf20ba0e29c455338b119b6e97df2785f480a3a55706dc47469694ea15"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "d349bd00f6c01f5773eb42bfacf108938fc65d959f042ea557c41c49732fc271"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "54ee799e826820e4cb26c368fb9e577db067b61320b1adbf98bdaa57edeaec79"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "3a172c37749f070d9654f64154fb687f8b62bb5017dcac9d38dd1294b8ed3739"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "7fff3bde5deabbeda124626147f71403172724d868ae682930dd9c9f67b0f31a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "1ee65512b13184855f60f4452d997a63367fe57699a21cf2671ca1ce3577e4b0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "41f47840a61dd01090f096ae2d04b7de4e324c3d2f491898b238e4666e420049"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "51c89b595d1445d77d58d48eb659a6ac98824eb685216116433f34a30b493834"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "845063835b71220f223dd550d9a597ab0921f9e603ca0aa85daf68d0d8f87517"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "c629afa9d9c7ede565932dc1d878f3361b2a5dbb9ba9baa254d74d3abbf96a59"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "86cb46b6025de294cce67cc0725e0be58d314f7802fce17589710a5ed7744f99"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "b5fb796d9e1bc621f497e30e05edc06a3e0d64e35b8d921aaef4b90340f94227"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "7e528f4bd556ee8d80398599ad2a145257eed2440d3aaf46abf98c38803e0e33"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "c347d172ae52cacb18e068c0ef0dee1612874c301f3901d4ade5f1bdacedf2bd"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "c405513e3c1f7b4bc0076002673a849567f568ef231a046e7e8c2271a01d8078"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "8bf90c4a31bdde9193be734f2b8b6053aa64c853f16ee30976f4bd957f2a66a6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "2ff9d27d0161a7630165ace629da486d129313527658c044ed3ba14ada230d6d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "5397d5db682a67e4ca1f278c4e6d9ef4c7a37c5cc3b3855b1e6e386eb072b588"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "d018fdf15714ee3b8c7029e0fc201fa68e9ee6d6ff6ac0f644dc3463a13aa8eb"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "1fd6fe31de5d0f960786e3879eed7c428002dd2e4cc6cb7789c635439f01a7e9"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "fe927d5599ef4637942c6a966f601186a60fce50f31d9c10600472d1dbcae55b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "2cab3b7315369a3d5c70ff820d4fda0048dc42c62ca7d6c86580a8be7230f7d0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "febcb37923b27beff8bbf4c26eafdff33284c7352f6811b902c15614632bcfca"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "7953473f2592ed23aaa961e6b9267b5f572403c4ccffc8f8456221ce4980119f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "36f2dbf3a7b28acd4f5b6c45bfdae8ec6383c88ca1f16c26d386e99b2b275031"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "d8d7730ab5d714b49002564c3c72d69baef0329714cb8927e72c642cb01f0d84"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "77ed91ea78f400dfa1e9b50db1b152fa245efa62b6e5ec97ded9bb85b47399da"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "1d735ae17681b0f370bbbf39433104d62a1481f8afc1a11563c6fcd52cb1ce95"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "c96fd7b50f1cdd2b806f37821512eb6db9e79d3df58bf7afb708fa6061968ff4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "901734e37480dc5e0b285197ec66ac55a2bd4981f59693c29339c7761c55515c"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "0965c890a518f3c8779cfc2c45a8b4c4d97cc315bf36d980104dac7ba286c38a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "a566b66b08a712dca096c97bc2252553d295bfb563910dd8ecf9822b14be0c28"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "d409e38f55ceedb3242f59bad03429fd48658a44c39cff4d841f1593fab777f7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "625c703c440182e69b4ae68ee1864cd7fc9c2bd26ee5980a85d02007564cbf93"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "5b8ed0c33a12ec5393c6bc42e942fd2c231f7f007df21948f9aca65613e6bf02"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "0e7c83c9ce48b50056023347a6b16c601759ea9797058517ed66ed1cea5a68fe"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "5ff569b3a304772db3450b9b057e8dea0a06ba290ae0813fa9eaef1ec7c25e10"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "6407fab9fd82ed448021aae63cf0957e8de9315e91c6c0e8fc5d086e4a6c08a6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "bf5643ac086a26e6ece6b39a5a3e73cd3c9b6f6a92dea1bd2ce8fa710154922e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "fb852e3f768aa801e9a183143051776df942542ebaa48a1b7097fda371a0a5e6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "155fc1dfbe5358e9a80e8bd2a823ac01da8a3479efb19ca21330fc31e0f8e0fc"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "49473779f277011a0654f772a2bcdb5d5942deba1e579f41a822ac0bfd32c00b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "7d48761f5f3ded12adb554dd8642ca57acad19f7d3cf9afe3068093a8809f2c6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "e2ec2ce214e30e030974509cdf70a3c619362fdddc05dd3addbf1d6aa58669ed"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "aefdf5f7595c6273bd167600c61a5d040218c8cf2070dfbdcfa3ffbf9b118413"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "ec68407fd547db67c67b375c8252142ffec1269ef965091a5a82167f81bbdb68"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "f3afb1530250f0c18724736f10e9ccba1efc54e8242da5a8bd3116f7d5fb2b28"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "e9cfe12178db345b8a762dd513ddbb01667647a758efb531bf3bd3485213d7c4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "77f05270694df68246c52a22c5bb8908de26d60ba23f6e5460644a91c8832496"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "31aae05c2d844a73fd5e29d73ba1b0c08f29ae7d2bbc92d3697370d4c6e82b21"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "0e0ecb81b68be06453695470459e393e45bacff2c2d9dd7aaf874fbe3f7a2361"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "b41d9dcb9a5f3a7d0b7674b31c46c69f74d548659633300335cdecc74cffe249"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "3fededc50388e0ae62c386621b65a2000a1410bab7a38af01f0d9c3af94a0f02"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "1fa4c4c0a01cfd413ce285db105ca423c4d508ae7879f6af40307a99755e10d5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "754e7b4ba577e0ac5f1f34ab9a72578ac22713811a7d286eb0e11dd09798e41f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "15159054cbd50c6a06132772e56a305550edbe5af608439c35a8561b2f7948cb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "b8c32d5418c67c01a9daa5230277e6b664cf808ab3c279d4993d544f49ae9f24"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "75f74c0c792cec48107df2d27fd903b725af9bfa4e058e7975354821973b2e6f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "dbf884cad0cf6a1843405df620ce014bff816720ee13afd7eab9db623a6ef1f0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "bce7caeb69e2b7014df5595c529cd67b24f3be91be0c7707fae4503d660d8e29"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "32b76d5b6d23c349afb0acdf38607a032920c13d02626ceceb83a161857bdcc1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "c97e4655bc1177d0000f3aeac701edf37af25531622a8ed064ff196cb9a54b59"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "ce762f5de1422212a871fcbe97261ea233bbbbce85aa8de6c9eef2cfb8c521a0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "e67992098edfd3f9f1333ad6b4ec846bd53518d6aa16784ce9b27cda98e39a1a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "1a6a8c1f379464ea9129df6579a6d0ca128b85dcaf145dbe21181c19d9da9946"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "5aee26e7a4004611718767c5ee69dc6e6d2099484872a19569325e3b35e0bb13"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "ef8f83f1253be8060c6fe8a80a39ff82686c5aabb73298c899f4db9e01ddc4a6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "abb8fb1ac67019e0dbf5642e8c96351e0b2d4d6a802d810b8f55e5e60c24219c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "3c7f140008aa584154a13158a0a62c96f1da00fbf4f0476bdb704f4e9ebd5a11"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "785809e00d25a8bb964542974d4396b3afadbb87ca80e5bf48780c01a4ed5f74"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "84534091cdfe42b4aa90893b5101a0d9b3fd7b51929886d79eda4c7154582d75"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "388429d844ec495e4556a42aec69a6567866133e3f5fb79b05fe70dda756b952"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "b0be17a444ea814929be655fb1622a5a6bb280e4c817a1bad4ceb76d24c608da"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "a12cb0397a4970f95afc97c5a8bd93f2626504f19d602ec1e613916d1af6b408"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "bcfd36eb65f8c1c110392d3232f97b5bce29606e6dbcacc3c7b331a5589193ec"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "44b728454d37e03d6286c029e15ed929e1aa7f8d5a18edc050d417794e8e01c3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "2c5c8ff71158a20327ca7ceff05ef185ab2e0b818cc92b0d60bf30db3f099fd3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "472d590a534492f3706c12737d2d20a75ad22fd770c6caf8182d7c3272e52374"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "5e1bb55a4fb5a6fd6fdfb409fe5e730ac5d23f35eed01c4759e3fe366a925845"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "132b3088ae68d251f5e72e55dfff86659bf152fcb1fc47f00febc76bfa5b720a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "2e8ba42d88d7cfaa6b44e31ad2aa07f8d75dcdcdee99dc11fcdcf4a976dd4dc1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "4b8d37297049e5a9a94f0e668ca83d513956464e8f9aedb19789ac75dc7fc767"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "f7ab29c0d318d2875b84d7ddcb7b94eb3f0b5eed33fdc18cfc0929feceb1e2c4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "14a1d0ef1cad4a9c9514c33a5c3b2d98655861d28036e84e8a32621be4a50dca"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "2af9acd01d7c39bccb0200e42254fccd7830a2de5c9fd5a4000f6847c04a8abe"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "7869586b2298647be710a46b7daed8c74c2ae476a5b84e953425707211b01cde"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "680f990aff7a67f4df9de47f66a5e599ac767e765c41ca07b96bac46ed727337"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "70f74c06712123f8f305abce65117546364a8d77b11d17abe0aa27f806a1419a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "59a67258b9c96233ad467482a045f996f6ba433e3fa0fa5dab61d7a338256462"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "cc4bcd21f78ede3ae50f7d25b8396132e2c11e7f990454d2c401197b216ccac7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "ed90ecc5f072c8c0e87248af7210324422f676643dd35db944ecacf7a1fcc604"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "9a238abe8cde1d0f2a6a8116fa2d3b98c616cbd6af96a6c07ea1268dbb8876fe"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "d829d2a3350589ae3825d14fd127bc9a3b994998e7fe62959c187d03c3134b30"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "3ef23f11359e035c9daa5a6a49fc6b1e44e37abbb263a9de70d945c4ad0a86d6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "bf90dbc5aecdc9195b06da89f6f9dddf2b9ca95452b2875f2ef4cdf63a7daa1d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "e3151531c54a1cbc2bdec9675744ef4a5e41dbf638f7b82c6442ff1f6a56efd2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "9ac2b92dc99b2c3c2e496b7248877d5dc71d712f732f2b317650371918e71beb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "0113e8ed611549221b0f0111ba2d260fd3a2c382d8f9d6179ab19c564ac2266e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "13e7f6f1de1e94e2be4564c2bda1f59dede0fa152eda1fffbc9414adc98781ad"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "8fb350102cf6f6fbd1bac9fcc062f1742c01c814072a437599ead047d9a825e5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "c5b04826834cd023f137c603a3f329b77f9f028b17280837e816713e5019cf2a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "fa10f14ab82f1bfa2e1f86e53399ce74643b26ca704ab5ed876416e002fe9414"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "e9e89c8bdedc8e3254fd84cdec20e281055a84f6ecf5c3e3d304fd83ebcbad7b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "f99bac044167cadbc8b60d85af92248497cd7b4ca6b6179f0da0ab7834cef68e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "37f6fe236eb844e5d948d973c854c73bce6b66ced713d18e740ad061fc3d3637"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "a368e4c44613b9838a73f7d5b221d9cd04436111210f5ac5535b540aa2c3e427"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "fe62270e63527a4f8da7586f1e3de915f4461937b748c689a993b30783642bc5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "3d58b4c294607b5339e0c1898bebf3d9550eb57787839f9afd45d6d1de8e5b94"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "b08373eafc1c4addd6978771d8eb507e93752ea8aae3458d507a29f7770f74e5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "2febd509ca76477c1ab3dc56dc28097430287b477a97e4229b9fb7dc3a591ef2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "96e142f859c90ad983dc4f7d3b69cf768551cb2557097e769c7fac28ec0d5519"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "1278c1a3f168e17931a3ca9ec3a491a6e8cc2fca68288e76630ee62d1f0f862b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "81716203e1f3d7a34c7d0f182acb78dbe7594a3e3cc45ec550835ca5692f2ded"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "3349db1d758b01f8f68064be19d040c7562e5fa6bf7bcfe56cfc57faf24643c0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "fa20244bf72eacbe47cd3808e980d39500f138d3a3561ddcad97975a373382c2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "2f2791c65d4206f2aa0a953f361ba1d0b2b52f3888dd407b6cca575efe6289bd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "970d4285d5fc2c4c8db9e1dae524900dff4570311a56a25a5bf74faa1e90b5f4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "2ed1ba1e5fee47770ade36dd67190465162c2d7b0410496b6af371322a164b28"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "dade8489d5591af0ffaec68cb9da4b72de3c6e17ab0125a1f589dfe12d6a4137"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "f25e4cc30d6a82db86a571ecfe1aea36f30d2b005e4dd756891fb5ddaa7c4695"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "e7edb421e5500d4300f35cd2e34b966b4df5266e167c226f2a8c38bb6aa98547"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "c7ec2c058d78ced054b547b8518fadfa3dab56d0bffa10b2aff1395365c7745d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "10ac2d64f0d8feb0d83967f860964fc40aef12a5eeee9169a5118ba1186457d8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "984b3fd0b918fb9561f9c435cb0a148c8687de621fe188a346e82862fa5cb8ab"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "070defb0a74622ac9cdcbdcea6a0ff77c8f0d3830cb74fa609d4b904a6d2fe76"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "6797dd6d3ec90e3d84fb67167fd9f4d9e6c1db5ba27500f614bba1e8bb5b0e61"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "cc32009b5869f7227f4fff1d5ecf34bd1dca1462a77ab1fa7621bf83e3cb4713"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "d371de2911c20e184bb7d01eeffb3b40d576bc571af562d1bb90249b88902a55"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "1545d09e3cca4c14fbaedd6b51043f3ceca82e2c368070c97ffe0e14f1d4da9f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "8a2c87264bae31c996e099cb185536b93a02fd495e6c7f8d0d8cf0138b927eaf"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "17ef97909443da35b59792ca36ebc01085f486e3dc6efd2e5e5d4d920ebc54c1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "74930d1ca5b52fa5bb5feab3ccda34a02027ae1033a270a869f7c0c8d933b277"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "c403196069128e0d91d79de96fdb729ce9deafdf3660be2bd7d558f1c9a060d8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "8d3763140ee9b0e40d138a93d21eb0db2af2b9dd6b3d49e8ddc23dd03cbd9432"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "ad0dc8d680c94413df8501a4f69491c31962174c7d1f9860c1a98dccf335ef5a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "8877ec96d8123fead19326438f5d3732617071f1c61e347573be2b16fac98bd6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "68a4e299c0810b7634c0c838cd3e87da387e79a800d037f0fc72dc5b2f2fe956"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "18e93412000fdc8e606bd5de0f8a6ef58d03faa27dd28e3a89131a6b1bdefc44"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "18aec7a66bdac9a4f9a0e974dce2363fc047d0b21d3ea09848c4525a3377512f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "d7e57f0a8c0cd4f6aa231147df4b8a28899b930cd6ad5b993fcea7d691724e6a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "c93e0f01f83b5199fc63b2f3b5e2cbd5f6e32c772f297c6330d2ef0e15c48704"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "d1bbdb475657e6bdd70917b3ba03cba8ca4cea5b23173edd82dd867749db75d1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "ceb2520e7847fccccdc832143eb8717119164273854aa5d06a87e2bbbd29dd60"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "797acbbb0d6d8ac7518d81737fe992d376eb380d054e233378da735a0580ca85"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "40fa8796cd7d1e556823fc53d39a03d62c94def2184fcbfe1689e70447a6b92e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "471922def0853d8c0c5fb68e10bc3945040687159b02410a91c2eebba108461b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "3ed984598e9f366bc9a7720bc29bd0a1324a4c0870a7a72ef47a942429e20927"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "dbaddc4e1cb7b03041bd68a7b6ca44df3e63c8e54e7b0055572d8272d097fc2d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "2ccac2abe2b5a47a2eaecead0f3fa37a649471748e29dd0bb85d3f0efd84f512"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "354e9ab75c0e3f4ddb127622ed237de3242280b32ac55f844eef8f9951903987"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "cc1cd604fc9aeff4afa3af851b2475c06f1c09496f219ecf71ee36aeb1b19ab1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "064dcdf0213e558b204360139c3e0a1b190682c7be53e24ec118c497d8b0b104"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "3ef9b3a09426fe6f30760ce382f7c64177a9d224f8658acb9630ffeb9c5340e9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "3c44aa3fcd684af450eae64e7e351c3d1a228d31ca485cc36a6f9ae61396d62c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "2c7e6a9f4041aef952d6307722cfe6cbc8a8b0a725dea2a4fd1a047aa6334bb7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "653f813a353c218f75ead711bf554b9fd3a8f45166032869cada37930e692d94"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "16e598e0fc6a5eb0c99a0e8c11c054f47edc2b43936beda2978717f43ab557b7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "dab32178ad2ec9aae7250dd65f4ff2806eca84a90fe9f93b34cb35ab20dbbb9e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "72b742e7a8ab402bbfc3460b66d7e28d5e91038ba24acb50aafdcf052612165a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "83e1403d18ab7414536186c4e8534d6fa22766bfbac6b50df023693324d9185e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "631dafb2a1f3871ca4ee1a96fda29d78b3ee8360df5dd82040ad2d0b70f1bb7e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 193624, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "db348f4cf2af060d003cb77ebd720e706b6d89d72067925ac1f033bdcdf7077e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 205992, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "d40848876779f597c970ed116b50b17a9c0ee036435d732c8d8d958de7b0ea85"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "70e3c90bea921520f6690e9740596d1b61c628e5b55ea8c4ceca4e53e036d0cd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 179800, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "485d07b1b3077b5897a8328b1d83277bf0b5bfc32a738273f3da8682fcc17d5f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 194216, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "48ddf8960fd8002ef736d68d34e36796431c277d0b08a63b04c70e18c8681a25"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "70702cb04f6a9deb6e87022567f810c3393384ddaf76618658e04db785d17dee"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "00f65197d64c05c20cda8122875959e477647f69571961d6f8daa81411dcaf3c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "f260326120265957d8b83be225c0829b00742ecad40e59e3f5f9e29a3c3edd23"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "c9d697a7bd2303062c2955b02ea7b6706e0d2833fb27e4e726b365821e070fc4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "3c77d013afca13855a439e39139947bca980d24fa37bf2888b43d8a8dd3efff2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "e8b2db9b04836fc6ca999c99f0fd9ba1e97c3bffcf1790f358dd4a8160ef4bac"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "d24a90107c7c1a93407cd222eb86f5d4352af46ac369790be62b3500662704eb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "e14ee126793fa43e5a7dbd09d08e4340c6c95b2b96240d85744865a478f9b262"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "a916012c70003c50c484d5a8c410d30a1c7b19775186ac8406a56efb05eeb692"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "c21f85cf2d226057c5b9f51e0678bb06328da8804998c5cf43bb675ae292dd77"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "74f701966c8a148dd747c048bd9ff342d24d592273bbc4b4ce8c47609366c2b5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "343807a579c112125259ffccc0c7f9f24be652abdcf0c8ca3cf4d17796763117"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "a68c972392f42b548819aa549f16e6442f5e66736e99f32c96f78027bf94523d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "f5512d71c59f9e1b42b615687fe93808d6d7a1796bbef73e9ab0ad7e3693de85"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "2848f386cb46becdaf1dbcfc0ff86291c8fdaefb8c5f0740ebfaa218cfb8ef03"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 193624, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "f427a93d3459a72da745d1a4e44d79d57079fb50bdc5187bdcb4386a3a355655"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 205992, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "16ee3144d845da42789db9325f2d6dda3d91edcf2ac029a7de9d56eb6abf69a9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "5a60aa67c0701461e5ba5011eb092cb29ae578220498fc07e19a510c543ab40c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 179800, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "5af5478b7b57dc495af5974f2d687a5196ee1862697bd3a659f0161285a7938b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 194216, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "b28e7b6590c6ace9b59e94866eb0892e9f5dcfdf0464f195ea76f2874f9d9d6c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "aff8bc34838b8f37091a92f7e786b75be0da409253a15132555dd019eb7d86da"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "ef47b7d9743f7b0b3df6eed0df057153b57f7a5ec2daf8e51bf20bd6e1987591"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "1894bb809057a760076ede90060d6cc12befbfcee201ca0b2122407d1625653b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "efe6fade41c9daa53a63fc227e4955c44f4669e83607e96de36f2e154b37da0f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "f088e52765d8408d63918076a391c38ffcbe8f1c4f3e0c913eb07e20a54bd2a7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "512b53e4d9c06003ef8ab435a6a2a8601feb9809eb50c38240bc3d1b7d35c790"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "23cb005dfba5fa5e1ce211effc7f95c74a856c32e215f8fc4d76a4075647df28"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "bbd3248d113c8a974dd0ef46c189c417aeb91b2a4da5ab7b17269875ab479dc3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "8accd24eed65616778cfa554830e302171bf39f725a40b1957d2b2b1f20c9b84"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "1c9390fe660535d4154216f525cbabcac8abf7dabb85a216ecba9b9c3d8854ea"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "006be6e4c1d3ca4fec25696e7eeaae2f3aec2d9e1dba5ad56a9245f8a3f97cb4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "f27357e75d51d8e5d67f2a8a5eb3209a48ae448e82c0856e8af81a9df10236eb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "b4181004a225c9be2fc9bb968566bb7bbdf87b0f991c71c3001303bd7b4c35a7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "51f9c2cb488358c54fd0e06090177aa9beb4a1413a9a24eef88aab9e1c8e4019"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "873e3c051bc6866b42e7f9e5d92a2f42f50a1f57b7fcf2c75fad370ab8939351"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198728, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "06b3266e57cf8b1bfdc0c344379fdff6a6a3115c4276eead8f374f3dad40e9f0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 185032, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "60276ce91b556d50e73546d2efae3e88d3869d5d4a533f85c2e2a21e07e9eff2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "1e1fc6e04e60406efbc5bf9867300b0218a3c32fa23f5ef469d6afc009429317"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "9cdea7a7f52eb14d248957f309d16f95afa139e709172b6cf7f3406cce964e0b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 169120, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "95b99ef9287b66ad09cb29a9a1de0c1231da361f87dddf5339ba93bc6d36156a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "4e761346eb86f891f3ff7d175aa2c279ea32bd128737440c8bf50ec398449f87"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 153376, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "9341d92fa0d107bf41d782c86ec09249eaa4a8a97e73ac92a72422d8b8dbd479"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "4bd3b144a5e0337afc6a5285bb1cf4373dbdea3ce85532780f0c16881e61b462"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 193112, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "6d59fe7c879ad7e5a6dbc97043cb42776c2a3262d0416a90e85114d5b4981a49"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 205480, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "4094d9e6c2e7424c16fc4d52f8c661825aed49eeff87434362a46d04af971f39"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223896, 384, 2, 32, 0, 3, 64, 0, 3, true, false, true, false, false, "b6306a8ca0541b26bcc5d222e17bf56cf971e9848f52f07b4397f2de5e298f5f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "2fb65481fafd3b38e70f000cb0caa2366919ee03fbcef52dca21a2f38eb7a507"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 179288, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "607da2c224e9a77e509894d6fb9606b0c28b2c8f1e7f24e34d181704364feb8b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 193704, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "1dbd717b8e448220e5b177663e328fa3e591d1f79b96d21e5bd954db81055082"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223880, 384, 2, 32, 0, 3, 64, 0, 2, true, false, true, false, false, "dce213ba8264e5d53a23be13b8fcd9c987bfd732d61e47bca6f065055cf5d9d2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "ed788b039823d24ac1bd8ccab7fdc48556addbe6ee2868746986dd95b4393f00"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "e2dbb024c8b4c5850f6ea5156adfd545d3b2f75c23d94abe4fab0550be431144"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "344c287f6491b2c895abf1aee2f3b8f6512db8337ed3af1c2211bee9729e64aa"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "76eec65f107ae2849ed1de6c618f1187924b290757a381926dd03a64e7d45a61"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "c72559c8b9f36d02a8385365c3c5a97ee1805f8eadace54653aab86346fc41c2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "80a8d202a41b0779868b0dcc83a50f9c4d846895473b69e7648fcc5ceda68af0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "3d8f0f37ee78ddbf01a7ab402850a16fd1b78a82b596d0d9d43bb8a7de6f150e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "ba823bdd35efe34d231f2e2431bcfde7b75b246144f29fcc6bcbb723087ba512"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "87e6ef63dd717402973b3b4d236753925b7308724f13a8dfd531ee715a62aa4c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 223960, 384, 2, 32, 0, 3, 64, 1, 0, true, false, true, false, false, "223db9ec028163e242f3cfb3c89a45886765948dc0f8f4f0cea0b07ae4f6d5c9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "281c397c7bd6092c4c0ee6acc4db220d73b96fd5d92adf5c4f95340315630f3d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen", 223864, 384, 2, 32, 0, 3, 64, 0, 0, true, false, true, false, false, "c5557c300ffd90e49517f926204c8bdc1810738252518882be14535233a137d7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "78b20f4ca05de9ecce2b6271ab1ddbf04ba574b24a90ddbe8f97b303e4ac7f8b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "defadb7d670592ffe0929f2d20bc7ea2a6bf5e017046ea04757dc1cf6ebd81f4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "38835422b98967df06d71190e906243af13e869f1b376e1b5a38aeb44ccefb06"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "8d679cb4953ca51afdbb32d4127de4f1be21223769f1efe2d9057767aa947052"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "0e74934fabac9dd10b055efe3e629f0b08be1025fa9dc7b6bb522b58adea6707"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 193112, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "9493c5028ddce8c5343a75745aedfe63c24a5dd6230e4c11c3ba5625a2cffe40"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 205480, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "aa506e193892a28e91ecccfdd0f3eab08c451b4ffcbe32f698a6b3c8599f6540"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223896, 384, 2, 64, 0, 3, 64, 0, 3, true, false, true, false, false, "662226cc7a6961facb8eec7de67366cf214dcba6247f5152ac6a27113acace0a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "3649fe210b25e7496083b2ac37741440a0c24b7177d4bc66849e03f2a89d2be6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 179288, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "4c95952352724511775c17907faf47474304c7546e9e40b0cd32794107f5dff9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 193704, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "d1e5d261638ec70505c2d2f4af3f97be0ef4a55e3e1a58673b69d6b6300540a8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223880, 384, 2, 64, 0, 3, 64, 0, 2, true, false, true, false, false, "82332f5f8842b54824b5579fa7b63ebea711507ef276dbabfd478f09d6a8b6fb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "95411e3b77b56a34b8d34f77d298d46bba6c02bfdef88bdb56bd703ed924cd5f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "87dc544dec1905dff6b72133e1a1d5f8b8d2cf9734aa1aedd78a1814ea84fc6c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "092f07d591855d443261063cda33b20bed913f543081aea5792e7ab3ba9f6592"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "f109a658bdad6bb7491387979106e1cc70189f4f86a754dc8849126410c6116a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "bc8164cde068d37089223b09fb5ddf709bae852f73c03aeedb8de3fc39f1eb7d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "9182df6dc8eb51955750c6883f26da425f1efebe4a3039d6730253a9c6520ccd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "25c96405e84629f182818bd84fae1fd1cbbba47cea399de754542b74bc258843"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "fc7193950594106c29bb27dbb624786c90d06782480be6fbb8821fe3df7499f5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "7bf7c6503bd6c47204aa9b69ba64e72e0e8d385992aa063b24e49fc9c49aa83c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 223960, 384, 2, 64, 0, 3, 64, 1, 0, true, false, true, false, false, "689dac7f34469f070682f8e180775c3442cfd8b6b8e842342a2569ce7410bf21"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "178450a937e438ff911f2cb1f6f736b6195519f4bc6ee202ad020f7e59ac4bd3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen", 223864, 384, 2, 64, 0, 3, 64, 0, 0, true, false, true, false, false, "d21698c644b865a8d420e2ed1c29bd20dc852f260f9ccfc1f5ea3f1210b6a8db"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "4626ab1b90133190be3f93ad65a3bc1189e8532e5f914dfc46932982035ce63f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "a75f0543554252a5474bbb1b79f08a3fef833fc406d5b2d6a87143ce46cd1c38"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "49d6690bed6bef538c6bc84c0f7eb0ef1f1df6729e8cfcdca61146202597e70b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "c666bab8d93e2c8848f69080fb9bef94e3c0f25fd7023f024e15a7a462e1eb9e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "0a2a71948532a1ccce94277eb64974da34128bdc7ce1d404cbb5a259caca706d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198216, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "da556bc5415e80f6837d755a902aee118da052dd2f67ab03466abe77c9ac437a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 229272, 512, 2, 1, 0, 3, 64, 0, 3, true, false, true, true, false, "bacef67fd1d9aeba8d83013002ea85f3ab2f4fc6124491c734e06270501475e5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 184520, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "480a13f5b512823c67f63dab3e2ceaa1f6da916d1174b926e57eddbb36eb6ee3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 229256, 512, 2, 1, 0, 3, 64, 0, 2, true, false, true, true, false, "cda8f4bbd5490824c4c99ec342777545568ec0ec5798c7ddab513af0f9d8118d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "87c5b6e1550486f7a39dd3f9015993b46f7d36c9b4676f2e0a3784d5bab290af"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "2d3ef6f9e1f363327cd040d6e04cde7da2eac599d92e023c27cd77b9544f5085"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 169120, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "1dfb46fb7146730ca95736f8070f39a01a1902e4720609f82575c29cc771e0e1"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "0b1151fcf244445fcd9256e8d1999aa9df05254a5243e8e32c134524a1f233ba"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 229336, 512, 2, 1, 0, 3, 64, 1, 0, true, false, true, true, false, "3034cff15d5229c1076a4e54da6e6f3a46a83983c9d3c64c18a230e50e0eec0b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen", 229240, 512, 2, 1, 0, 3, 64, 0, 0, true, false, true, true, false, "2b808a97b36cdfcedb587df0360f6aaf2686e50e842b29bbcb221b27e44d4269"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 153376, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "789a5be4673aabc8f2ba5ee84dadecf81f3ce6c0b81495fb28fff24a1ac92d51"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "5d067a9fb767e19b9283a4825470f4b76ec11a1af842be0e973f861d51891d34"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192856, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "8da3b7eae9f5c094db4069af10e236687ead71c61a3b8776690fcec0548e5378"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 205224, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "1056983b81574082f520a4e9143a664b21ce18e1884983f59df9cbbffadff164"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "8035e18e3fec9ca52ee1ca5f43d844d22d8617b9b26fbdb382099d4470eada9c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 179032, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "7f5e37105795a57419f2ea32456d725061e19d8bd80008aeff166da7b9582f01"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 193448, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "1655513be3f42bd3fb68ff2e0424bc62cbe80ab43c65fe14a8efdd53a4bcedbf"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "11eaa63656fd117d016b2de9f732048c803dc5d0bed51594845cd4f03a4e8d25"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "6070beccfe72864addfff46016cd4d3fed1593592b032fae040ed2d77a2156db"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "563c7124b0e6227ca8f18570f3dd4f7fe644363edf0b0b32059ed586f6504beb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "7528d830fcf0677c19d85c38bc1da687b3b5f3f99efe3f0b0bb901523a1c9209"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "e43b4fc4c58fa0509fa4419374e7649d64ea733c45f98f16b40b8a939efdbb1f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "ffb9dc193f477ee85a414b6a0555158879ff90c3d66a2ef2fa33257345db60ca"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "b4a0429763d6b424943e5338036fbe348fdba9add94a941c646f2c5c4560c954"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "7af47b236ef7c828b047a8cdf182b53b292c3478ebbb747de8f9cb2d9d9d575c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "66b1b1db442250c3b5fe53b3b5a7689cc01decdc90c3942c7aeb450cfe900789"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "adbab591a3f7d328bd417a38ab792f3c298f35dfcc7b0e7e4563388d6dac9cc0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "d038ae0f3a256ca3dd8eb1da387ad20df5f930ede680772a1077875b0772271a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "3838958924179ebc4219f8ace092cdfe02df2bce433219670a524a153c483e7e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "8f04c30e9406f446e3b2faba7d4a717c17a17651b54a40d92a230ab7afd06fe0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "b0e186040c3a648c3c46d548797561ff99fda95b7dcceac57e9dd8d7cfceae13"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "04b78865fbfe84b5400ee5e07d59fdf0f57ccc962e7cce3faa927423e6fb20f5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192856, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "8e4f761915b82be84aade012ab5cd3e3393d4421cd36c7a4bdc599b6f4e90c89"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 205224, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "d84888d45afb95463ac37f34e3cc87a608713feaafdc758e3605f5ee3699035a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "80021dff2d7999fe399b0948eea627b2e2971b191e3b6d906d5ab3e04676bfc6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 179032, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "09c758a5ca7a484d7b8313a57a7bc99d28b33911d5bf6260c19f61a60516ed49"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 193448, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "3b63724a4a90c7c42d8a9245a2089e91a905f258adfe3285a16d4036cb8207c4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "991f8885c25ef3f6531648a95c2f875ba1624b85d93d52ba07f08ffea2718b43"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "a582684e83d33f84b03e2171920d594674be7b59a9bb3d0e02e76b15b33347a9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "b6d215dd9ccf4d6389851b57d0596ffefd9de4a943bdefd02cee9314ef5f3128"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "80b6798ba5a6ae3a97c03c82a4237b758d8466abafb08f886e7780cb9918aa4a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "0017f6bfcc95033ff7d2b35581e58356db383e8b31c31a452e8f0f62d07cea42"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "014518a9970308e6b04822f8e11b111345d1a59ef0fc28999dcecc94d2787eba"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "05dd4f8ad9cad975babc951e44c75126bf6b8935c9ef3216c10d4b5c4b5f257e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "7bc2116d948aad58638b211a4b978c49f5f8d9b91cc0980cf9cfe761897ef17f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "4874dec7c92b7bf13b3a25054b777bcb27bdb07e784fa3d8ae62512bc65fd0f2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "083ddef859398e8c76b580a12e0e434878521c9e504a883d05163a417245079e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "cdc5c4b5d4224c9d8a633e03ee59cce2b6b28f1d8b7b912804630a0a04195a90"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "61568a0ddf24294acdfa9a3dcd19b4edb2011b9af1f401c6785f92306cb281e5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "01cc15825b3f77a954ae37fe4c9a7699bb12cfd6ac62d2f0731ad17b9f750379"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "501ef374eccde936716cb2935d524f78264143fa0c69b01929647951df16e77d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "0105e34532f13d43521a89b01ce8db01101333c371a15cef16359cf4a2fb06ed"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197960, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "1a3fd7fa84286d4e7f38f59b512ba0df793632778e948358e971dad151ea9911"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 184264, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "d643347ea3cf3c3f2c8bb342a47ca43b6a32fbf24fc2c3175e01df93725120e7"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "c8bc2d71f5744aa84e8e3e4f2555e55d1d336fb3e8645d148a467cffa3137b5f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "bd39f2ecb77ed3d5af9890c3e487b51629407252abb650cdeac7497b13b44c62"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 169120, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "b148d5af7f594790b4321da492212d58da329d399fa9af91ae60666f2e7ae6dd"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "dc0f0fb7cba9edc9508e4c91a3b044d1d933f91e2c5d0e5509da707c9cb53aa3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 153376, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "f4257b2b83dbdbb84dd444fbdf6c9685ae37cc0d2a00a7d49072c8e9d4ef0a78"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "028c0b097dbf90262399c6427b5c825ddbf1a653d282b7cae43ac136d99bd1bc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "6591366e1a46d63e0c59e0a2b14f24eb0ac5a93ea1d891729d8dc3f447742f76"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "f2fc721b937380d8ae65f9df9f26b42903b2806385dd41916ffd08e862632823"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "a8a9de11a9d12be84c3e81e1b889136b898dceaf71eb99504f6f14c3136c09e6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "4fe8cedbc9ce9407ede6e5b424490b7b8a71cd9718926e6287cbcfc1e64d1023"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "6b510432b7e7e96df705954e1b0657ab54e855c2fbcc10e46567c8598b36d9fe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "49caba808b00355d8b958991f6fe589693748cd6360bc5462b4ef146eddc8716"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "4e447170b91bcb44ff2a16b4e763ee1924f3fb232cef77580d5e481ba5a7dd55"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "c47ee8c3acf1cd51e238891bfeb5530a12c4f9215edef695c4b68d55d636e725"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "b4351ac94a459115103aefd824a8bfd96292ea2a42176127d60c8d61e93da29a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "99268ba2e2bf141a332ffc97ec6bd95eec896812d5960da6d29c3bf036542170"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "29215a9848bb7e3156f3f636938b8a8b27c7555ee1a8977107f0cc4a863a9fb5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "4896abf7c27f6465682418ea962270b45df868afae46687d25874fb9303f93ad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "759e74842962e4b93d90875091b2869eff15c66aeb130c7b763098c2dfcd4efe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "ee012f7227a856eada1c4df3c7f1e52a8546bdb0c7fb5e03f510fe1dd7832ddd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "344579801a9e1eebe3fca7dd175b1b8be864d33f739150302fd68de3f3506159"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "9f37b6985abf2f391eb697da18842d54474ea6bf706bf284c987b18672c72b11"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "e487fc4aff00f0887d7ca7b9f573c50b4bb066bb4e94dc930dcd7c7badeadf52"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "5488b8eb3ecb57c1e8f9a2f8a18958de100584929d4ff5d15846f345297abebc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "760e21bb6ab77ef612ee5ca1fd92bffc5adb01eaef1beceadd9d5701129cbf0f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "0aef4f882f2e8ef6c49a638f8bdf2d30834c9212c005127fa2cb3a87ad02b5dc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "a91a807e35ab2368be86caef33d4f0c4f238588ce7030c86c30435ec840ab933"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "766173184eff161e79fcc3230fd1983b9420fc1f38da1b0983125a564fe41fb8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "a67b231fd10f204ba3a2c2dd1473435234bdb17b579574a2b2c4bb6a8495df95"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "6dd3445e473c1b6bd4d3e4c367bcdef15734f52d3b371f7f2b8b05c260c7eb03"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "db22df32cdd5a393c4880039d6d96ba6e91abdce4c50edb548ab6989b77a60d6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "aead57a3b587cdff93b6d33aaed97185512118520faf415bdbb436e26bf971f4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "fe946d71499dd926859b268bb4263ce6fd0ff8d58f789c05239fbd75c33b46a3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "ded5bb4392bce5c803e9725bdb14666ba6c4aeceb8c05c2438c26145f2cdab43"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "fd334f95afa6987fd5e5ce6cebdb5fc51580f4c277b15a7e35a4c381638a9997"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "8543d694d36cdaeca6dfc5bf4fcc3bcbfc212ad784b5677373952c0fe5a37df5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "5ea834f780f376a297faf063378b05899da0eac6a7be0e9809e2be196efb598b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "364bc4b11e82a6721debeeb0e30d4cdf720a337613bc5c568512b371849b90fc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "eb52af00eafe9f72ca2ac09d42d34eb7b9ca76d9fb20525196cb3377b3f4e73e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "eea1f81f39ae7b2238208d005d73ca0e7f637c8c9787843ccaf9fb9397889a5d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "61e513cd74991a0a8d500900d0c99bc99c3326ee018a077fc28cbd639cae7895"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "c338cc96e09e428a47c05361bb942a094cf534ae04a56a329c4f5f76ab5e103e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "be1f305ea57b7e7916c5de6acbabd77ea575001409dc6adb1a78c4f225b72436"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "935864014cd0470f558cf22b67394a29a6393784cf0fd95398b106e8e586441a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "e6f57316437b00b96e438ba4f41be404bdb4d962fae626ba06a2d08c47b74d7e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "cc862f8ac94b153e0533968c99529e8c93342ea9f8e109844df256f2c384a458"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "17a6fc1f4d777d41bac501950e8e9691c677d6a2bf797d8408e3971b800f0818"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "2f1bef9850693d0ea2f5500fba982e912eaa2564b090ebcaec1c77ee27060a7b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "ed51f8c678dccda4fedb01b59a736e2374dac52cd4a48ff1079b754a26deecfe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "8730ce7eece18520bace455272954d24c1eda6342463d88a663711429f91fda1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "1da6192247865be565da126cc4958cc326ae4a96320ba24f9ea984fa7029a6d8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "437f2438df494a6557b034658acae53a9a327b5b68ecdb408063da0d78bd29ba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "c704b31475521f6f357fb4a46cc417ac4a4e62480ee93fab18683b69204ecd44"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "4e2fd3cebbcbe3a7115c0929068864a3b8f077d12c5bba0c998be7f478f12d99"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "c04b26ab63b46da4c7a908f6a5f76e1927ac23a98fd62c57d5fd2a13a4535314"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "6a0b1b9f02e7114a4cbb71712aa6c9271e890fb5d641512e495a889a7378c4ba"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "94e0541827670a128f23d2ef14505bf873bf1e10d84afca75ee7f3166d8aeb37"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "1cfec08edae286692fd6d1654df9ee2e5c552db976efa0d813cec844172bf336"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "3892ea13d790774f75afba53856d892eb69e835862b9252d6a9cfbe29a10df17"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "ba367a0b88b6f14b1d4772a4ca96fff5e3eb66b4871eba7839e0e1ea4a998081"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "4145523c959e0d8fc08b2c3fa6f87202c7735488801b145093ba4b80251ada19"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "bf596c3878d598f383c8412a5b552ec543eb21fc0181fb8c11095f1257d75591"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "b7bb763d8da232fe0cc87f6230a1f258a7e3862f082500219f33cf45fd9ecd24"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "7991f07e36d4de8a906a74239a59f23c8f0f23777d50a76cf55a186260b7a980"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "5dbff3fa50f84748d9f123d5b7a0eebe6c6bd5fe71e5502ec2a4016ecb452487"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "441fdb8baea01304330c67f7690c6d79aea9920b311a8e59eddbb55cc0a5cbfe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "6b9831fe3903b076d6217ef1c382f9a918ee3f0e5e9dfc91fdbaed9258610d98"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "82007a1885ea6a1c74b6332fe766176ce702a45d7dc02b9e704032095f679e21"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "a157b8d0d44b47d83f8753ba532729e3e5ea7d0f9dfa4afc5d6c1c3964b3c74d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "bb27820982890ae91fbc116c54adc3a50291be69e4e753843413e4eb613577fa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "19b787128a9ef6d40ae26f8dc21a13db544e0378d468229311058dd049b09c38"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "b61682f31dbf16eea3b50860b50b1477004b292d9656ed7af9bd961c99e762d6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "722a83df6a60af31fbf5c92932b4056576038ac99841b9d87dc1ad865150d405"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "5ad91335d313bd57b3f9ad566d2df61a3021024c1f2f65baef4655afbee17b66"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "ffd51294bf1f474fd5c9cdaeda995145a94734c21329d8bc4d87e99046b9027a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "52f57ccd02175474f68dcd31596c96cee7f66ba522f2f287cab09b911265849d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "91b1fa2943624c2a82b345d2728032572d90dd516675975b3e74ff49bea445a4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "6adc60a600ccac3ab0211fbc19abcd955ae2e36d0e279b3448613365251c5897"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "5360d720f683d802ce349e4f6ee2d097230ec6423aea4a57ad975de51e21b79d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "d83461c0f1ade7695cd3ee53b33619394f4d8c6d9fbcd58f6ea8e0776ec04255"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "143017245361eb2b647a3f433388ed945cbf642ddf0518fb2b57742512d8d1e8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "f40b1390d3d2ccd4faa36246690532152363cb996426afbffeccacec71bcc7c7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "636d17e128185dd9d7b0a15350fc1c6673280c6e1e03e48582310014652220f4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "e08962e86ed98a107baec48dde0d3d9622a56265468211487a5fd30ec4553fde"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "1bdcaf5626031f611d1cc4315d564ca4bdc3b03560af5c49eb5444bdd6b04518"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "4d8edee5897060baa9e611c430fe495f6a58f7b20e00ed69f64084d5d24c7456"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "e3068cab504cdaf317526f0266c516e777735d4312aac381ef34a9d1ca2df5d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "0630f355d20e7cc450b3fe58673aea0f0eafba5d455c0cc86f480761dafcb31c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "d2b96dd0dd17347ec3e3c187e0c93bab4c756b904ca390549defc23041521d65"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "44bd26b2303645cca078caf72837a14416becaf44661be496d056b93be617d5b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "e4e7e7e8c1a6d275e51fa162fc8f45a70db7f04aa84141fb395c1684a2d5cc1b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "71f99c46eacb2eae5e98cf28c68719f043f9133ea19783b96fd7f9524b75514c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "8a264819962f4cc3fd82795cb626386960caba583b9ae5594c59c78d2f486467"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "6de92436731ff9c5aa3fa2b91b0466be0c537a7eaf1c974ee0ab44be2ccd71af"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "9c7581a0dbd1608d615a3881224d5d3ae5523be524040017570411656bf13eca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "09f98b32ef58c8d9769f0239a6ece0793192e13e1a4fd3200f0d74126cbd1a2f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "d0b3c60aa39e3dbd9ec914b5437c633e47b806c36a50a6cf6660bb1c98718708"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "273f532d4c5bc0f6c5760defca1117044f67aec8fdffa77676a1c68e6cb8bc77"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "8b4f0ab3a479dabbdd0bf1185a554053b86c166c087af81ffc19d9d21776b12c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "b7c284980e24a92631b8923cea6e0d1b8318c1ff7362e006301035dacb5d1f5c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "faee66c4b922de884d7df950f6de43a9578f8003c7544626fbc8f2c102fa2985"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "b2a82578086e0fbe040543ca1907756d0dbd47327a9744f1554605cb2b7216e1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "d1f484541cb97c301db147c7e28a96af2d3e01a7ef95755b5eaef99c04d1c3ab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "aba85e6e8d7b479c218e67a366160122bb535a37be57e19b90c254d950d77f3e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "2227129373901be82d28a2db4c2ff02fac174077d631fbc632e90789d07070b7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "aabce39afd2ca59593b82c8c53acf4c5efa14bf87f9675dba272175ff9a2a4ad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "e62f44f517f3878eda470a75dada308733bb724d3491f538cbf1d3a797fe431f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "769d67c163b8283eaacee5e74f303e8c42698732bd6861a48f25a57eafc20e17"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "7a97ec0519f9525fdf8c91d5e7c3dde64522a0ed7104047882bd7cc436608480"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "697a417c1fd95b9ea44866752ff81c4cf33ae05bd041dcc393549b83ff375b4d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "8ac0f8fe1ee9f23316d5083230eafc1c40d7940a03d8a8b2826924204a97e5e4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "4409b3f7084c519ab916b9d0689134c3cfda49ada324f9e2845d5a9e349d763d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "945be7dc47686eb3d93939a7e18602f8720ebbc22518af10aba579ca71995bcd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "fe38c77bf1845ced2c14375b15763aaefe24f29e3998bcfc2b89e29f4f389a7c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "17fc829988b9daf9e62b35a3e8f9ad883b5f8034f8a82945c32ab03d04e196a0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "aa3504ea88fabee24efceccabff4fe82178fde1cce7dabdab7774c2deb3812b6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "525cd925dc6c92c4d6d749cd1518acef164db67cba0a7de90c41e52e7330a9bc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "42f1723a9aa3727d1e5b336c89e1312f60fa467bba0b3f722dfaeb93d35bba5c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197800, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "0229345cc3b498924ea38aaf1459a7d56537ebd826a64b9350639237bfd80510"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 197944, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "3cfd5399da3eba0ae352db1de2bba8d1edcce1657c0b54fa5e47bdbea3b07121"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "65aa653ba809f15fd2236dcf79bd958dbec0dc1dcf461272af96d52adcc05ad0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190120, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "1ffa430fc5cd8771c66cfa4edf4bf282e4262cc22fe14e1a9d1a7d4ab694ae0a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 190264, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "46c82c751ff36c5c06d35ad47d5f9fa00bd6efbf70613fba60b163564d5ba08c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "77a351e7d6e9dc89fcf82bd549eacfb7b40e35720b3a0a14ef6f6578f49f2c40"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "7157aa4c9b33bdd3dc6798412f2577aae117d3a9c16f0bcd35ba5c641b10027a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "bcff6517a455cf8dfcb420cb38f083946ad783fe5e9042ccacf89ae75d0d449a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "499aff184a9da13e55a77b0508e7be558ae5a42d0bc58086081b6ee05fcb1651"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "3be75b7c9ab31561665f084bb143b828247117ef8b91e0e10968c27f818088c6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "687b8b15622fe7e2d5d67e7798018f08d2b7102f2deeaa6acf91976ea34e7395"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "4fe103a0e1527549c48063be1a59c8a49651fd373f6355dc2abe9ee99c56d94b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "9ae2f0fe03c30f494c258437b38c40d5c6f1b7c143b241cbc9dd4570614eaf38"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "80d39a8cc6ffb78ca645cf50fc33629ee16050e21435b7f065395e53e2bb4afb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "15345de9d91697184a7b6418ae780d6bb9967f46a2518728d0bfba70fad4708b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "edd79d8291e5865d652c14e034d0db47c6ec85bd79bacb216d0a1d4f55804142"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "70b92392d1166186670c59a00b1b2f2f4b45a6d16fb3a21954da85b7fc684213"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "1b1114725dca8deba209587cf1fefac03d30522d557f71e245368e9e77ae5b9a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "bc78bf00180aff46fb4a604787614b6b78789bec91fd8cb622bbb1fd77fd1cdd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "ba74aa27c22aa5e8906af07a4f2c1048dcdd51e69ff36a4eaa94dacaba50fd77"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197800, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "258f647c9c4f759cd0b7042ed2dfe54032340673bf13ae4bd00c6841be8cceb3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 197944, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "cfbcf919093353fc93018450e946bd965fd252f8ed23d2bbc6509a3a4b824918"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "1c90dad7c6a12672df751f008754a21b8aad2ef99162dc589e441dfa62d228cb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190120, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "488754a513543b61e143ce41a52241b004262ae80d68a0fd2d18615d57384731"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 190264, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "7777a333882b80c7b7bf66d1c804833bdb43e4779e92a7c938aadcd63296e2a2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "c99300483ce65c36740e7bddf322d994867c50a04464c33ad9b5ecd77df4ce0c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "9c65aa5ea3a30744e0ad089ee5727431e53f0a53b1a7d63f3107b83adafe9021"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "52b8479b0f2fb2ed96f7d4401211c514cfa4ec3ab2f81ea4cac0cfc08cd8f665"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "83f13fe39cb1134958bc552b3bf2de93544f5b037f491720fcddaaacc9335e5e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "5d613bf65691d73ead429a5229f8ea5805c5f10dd5167c2d33ec38faeab4fc00"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "d3f97cf6d85e6077b0c8cbd64b672f3ed046034e9f9078220614048512ac506d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "cf6c02770462bc02eaf74763981bf99f9e79847def0b9a3e03fbdc27d4ad5337"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "a975ea086287877ce86ff1c82a35dfcb89841f58dbb4d47c07b46d32b2fef13f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "519df9b9c33419f16b892b93c327f50d32feebd8d6d8116c470fc41dbdb59df5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "1c21945203d942dcd0a4ec222bda0eb09a21e660db3ea185ad790a4bd3d6659e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "c907d74ef163a67cac6fd7d8c34c921a583dc1bf8bb882278188a2b6c0bc64c5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "bb35d20f7195bc771afec695c3891445d77dc7ed54c3a5fd1185e774abf8f85a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "1e0fae748c63d041e4ed09675acc5f932a89797dac9cb480a130a94f3faa0c3a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "7f0b916f8fae9898682f4ac150a1d952d53f3fa8e22e414a0a90aa1dd91b1b23"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "ad56f854b5f768dc54992f7941f8a87ce3924ca67e479d88464769b87cdb8b4b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213160, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "94d3a60e22112a8f7b68c9e1a0e96a1e68f217a16b3e974667d51f2a565333c6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 200488, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "f7e4fb50db328d8d0523c0a84b27aa6d9d4c1daa1f22b6f8bf37f69335eed104"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "c713a37848d49741becb4e9212847954a99da2d6af7e4e7b4c5a1d1a6ffbb316"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "5f917e0a2a753042758dea8d25212cc98626c92755d08bebec5bdc52319ca340"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 183552, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "2b8ffd4d5bc6de25444acd1406cbb58450acb9c2da9a854e3867ba1cb49268bb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "54245f850dcad05d9311ac025bac55b0a00cb402ecd0f3eabd265c77a1af2a5b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 168832, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "f4e72324e93450693a68e43a64656f935656602c148a99defa9786c742e54a3e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "636957f3cd5d41aec7b6b2f6970104b1ce325811891ea3ce62988257152af533"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197288, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "df56d01526cb35800ca06fd2b81ad20c8f282ebaab0af8f75e57681943115fb5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 197432, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "66c1232121e579bdbc64101a2b843e63c48e3b4b623c8d92b05b475213484ddd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207464, 384, 2, 32, 0, 3, 64, 0, 3, true, false, true, false, false, "fbeb685e6251d6dc3eb2cf68f35b58373339fa751a8ed8dbfef4993282134e3f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "86cd258d7d9b9ddea887f617605741c71eacfc0b0bccdca61904b643e3d323d1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "dbb2619e32285cb82bc7816d9878dc2e33305ae86b3cced3a854859b067878fa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 189752, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "945aa79d354866d3f303b7e033531a7fc603469d83792adee3411a036b9378c8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207448, 384, 2, 32, 0, 3, 64, 0, 2, true, false, true, false, false, "2e950e2e558d8f60c2688af2422bb5107de960579ce157cb0848e6a95f1fb8ce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "ace563c826e4ec8810afd9859acc3f38b28075cb6e4b2d6c5f72bdadad8f433d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "8952f57af885b9e2e545b7993adbb05ef78fd902ff3fc3a599eefaeecba68533"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "b2dfe5d3fbad83cde5268b0737f9f3f95ba48d1de6d38e5762a919bb95506002"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "3dbe835d02277753f92d585f62a4592bd935f7cddfe32ab076fbde41019dcb50"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "3e0221fc8452a8bccf4d5898101e469cfc33d9793fde60fbe084d40dcd7fea1c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "564af49c24a36d51c9665bbcb9ab057c46f7a1bbd28c6a233fc70d744a0ccf1a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "b56561307b5d5a7f71f1e02394734e17857669717e18edc554f4ee2286c96fb2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "f4bc3e858c4c7043deb21eb7208d1ac8a316ce11b9def12a355f443da9de4960"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "61a510dbdacfb0a9748ef79dbff768207f3611019f865bf75ca97c1394b1b015"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 207528, 384, 2, 32, 0, 3, 64, 1, 0, true, false, true, false, false, "aa7c2b8c808f2a991b4dd4ace0186a13f4b7ec50cd8613a7d91e7c6079cd7a9b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "37c664cff3334d5f544701fd30315fcbd283344568fd9e3d530e94404408bb2d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen", 207432, 384, 2, 32, 0, 3, 64, 0, 0, true, false, true, false, false, "085b140996be6749f8f4ad499b0464a31d62014a5e904b60f0922c35d2fea7d4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "03109e2c6d4368cefa17e3524fa639000f351969b8a73bc7f612857391de4fa5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "dcfb535a9982ba62e09c635849b6ee45b72a65cc8e8891c9403109dedbe2a936"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "a66b180cf507dd54552a08e79f328c9b751bb0abe3bd386cf348700cbedf3329"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "02385cdbe44f5a688d293ae99a4a6a2b7ebd396b2e864f05ad7bba5b04041ba1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "4c24150ab537dfc7fdd9a1988d008bb3ba3daf71301f961ab4871966fda431fa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197288, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "55b13cac61ca1c55abd74b79d2632a2440ca2a1052a3eed7da992058bb0cb90d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 197432, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "2e4cc62d487394df20529f4e902d7ffa9637b06d68b5f7c3a584542845923744"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207464, 384, 2, 64, 0, 3, 64, 0, 3, true, false, true, false, false, "e36734ab78bb7dcf3494f3ae1fef269e933583233454e866666e6039d028884f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "611544d3cd6149b3ee2c6b516844328e3cc318d05949853f6a41f57221672b2f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "70c9378485baf1eca2b36eefd04c6f91c075935ebea30393234f8ede03e0cb02"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 189752, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "519a2a60a773aa71c392d8f4629ac360d23cf0524ce467dcb97d0c86ba8a3a71"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207448, 384, 2, 64, 0, 3, 64, 0, 2, true, false, true, false, false, "583c57af7be399ea8ba63fd9ec610c6f611858f22c97b077a3505f4ddfc2a483"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "15b5e75d6524980bf3f4c26d1ca88d03022dd882e1b2c0fd3569d212929d59ec"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "b18cef681929d5b73a4fec75c580dffd40341d76e7aa0c9e3240e540857f91dd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "c25491b45865aefc6ae26822bd31be0a397fa39a82c4bccf42e101a0787b1077"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "cbb92d974108193e2a15bf1860ce8637994f4a4d75b2bd638bcce413171a8fa9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "0dd185284baef49b6b33dc016ba98c1ad38b351a8c3f818482814e968319f05b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "6b1b317f2d606abf291b2dbdc301bc25b850102908bf6737b75d167d1d0766ab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "bcc6b6df6631f66ac53b15c3705fb8928e587d576cf766fa660b64ba6cee0f1f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "344aca50216f316c39d24289d30bb988522e345b17c6d2cce59f1351136fa712"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "10752a8b24a0018646f0423dcfe96a82b4d372a8e4801a9ea2b96f8ac8676322"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 207528, 384, 2, 64, 0, 3, 64, 1, 0, true, false, true, false, false, "ddb31af9c08f03ff7707b87b297800f249a5a18765ee30c2a53c964e1e0b5b43"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "c02552d2f422af300ff9d8e4a598dc5f6823ef62dfb87c859ee6c4aa4bd15b8a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen", 207432, 384, 2, 64, 0, 3, 64, 0, 0, true, false, true, false, false, "6e46755e5a005ff7bac48220285c5963201e5db541e5a5b9d4cc656c677060d3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "8b82d3d3e3b24b85291b0b14ff56509306125b04811af7956d0b6610546d341a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "f3cc6b6d5204e1cb45d058f28706c67f55771685617436651cc34d27fef83b52"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "1c886e42735e91ffa09689bfce542a8c589d03c337b1ca72e4f59399472de9cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "e07ace1e916efc276447d3738e4949b29c8518a067fc906d8b3ddb8d2861410c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "96786d928a3623e4692741cb885b3b1205859a027208702c2cc401a0b1343554"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 212648, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "a581be3e4496eb0b073ecbe7c5f55cb4e46d9a9841126456638c23376c2ffa05"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 212840, 512, 2, 1, 0, 3, 64, 0, 3, true, false, true, true, false, "92acc6d7a61f0dbd86019c7138232dfab13b7a3a5cc4bc4d073f2068cc95f17f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 199976, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "f11d0f4d541d3d433f0cf2691faa48da3777a63c29de8af8ac1f6540bdd8b85c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 212824, 512, 2, 1, 0, 3, 64, 0, 2, true, false, true, true, false, "deb94ccfaf1225457cc1277baef3126fbb9a2091514639c318b0f108c103a6da"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "6a647ae8a78c94b684a2417f91d6bb9732d697f7da8336c608945a2cf78c8c73"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "421c577b53ac646adaa8f0793acf1981f58cd7e702082c296661eac7733178ed"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 183552, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "3cb92c40021c6838e3e38a3ad6e4192bae305016bb19b78eea8c236dc14024b5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "7788f7be41c77fce5e1bdd17f0f4904e79107d4bd140eb26656b9e2ca247edc4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 212904, 512, 2, 1, 0, 3, 64, 1, 0, true, false, true, true, false, "f6542e1ba3c6d1053bdb85befa016274a60092424a7f0b9f01ad3f94f94f8020"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen", 212808, 512, 2, 1, 0, 3, 64, 0, 0, true, false, true, true, false, "98084607f091a576c90cffe70c779e4c25de61e76a90bab07abda842891403d1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 168832, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "6ae84a283f9318cac9a3aa9d0012e300c9484a5fa8b099fa8da87fdc6b4d43f8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "e969dd442ec92b20dcf0c3f5548a7e98d43a22d44ee5765efb33461ec208c19b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197032, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "e99263bb29e7cbf859fb25d75186f5352db885660717a9f7001fd3e93bf5f0a8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 197176, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "a5c7a324adf73eaf79e56edcd2642fb9bf89157b636cd4a9e88f77230ef2ed62"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "70caf635dabf07497b25d96a3944af97684dfc5e7bdca7ee48f8bfeab438cbd8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189352, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "0214dfdca230508552de425d4cc2d233415351902cfbdea6aaed26b606447752"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 189496, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "8f4cc348c1de0f6de8941998b57a8252a1513225ef5e65566aa2fe08cb1f87c9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "5f7ea85f7f0878d84c179e9ab7162735d3768e18669d3f9c4798b3f60f85c56d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "64c58984444de715917d596e3660bdf7ecb9a7856d4938f358fbf50451f90ff6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "e33209dfcf2913e130df09cab3189477775741f2bf573f482efa4d06fd7a9e92"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "26ebdf04cbe43a1ec4a8663d3a66fc741ce804ec821bed6c864f57e3431d3d3a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "13c36fb1f81c39e403c4d723702a3c5ddec103c7df5915a086175b2a7121a06e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "777382354e0235324d2c49732eade91340567a936611b96db58ecc80164bf59b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "04389080ab68ed5de32889ddd79a4cdf43c87a57edbfaeb013c7c71872afb622"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "6bfeb4a256475e54075aef5ad92d45fea915d6598945c2f98bbe68289ae8ac82"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "236dd783f9e219ee93803bbff3bec940cb3331a0ace24e5608a5096cb2aacd11"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "b4bd99ea3b325ce06032a45fe7f4466a2f3ce4c84c5717d496a20d9966c68c37"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "c26e262ee0b41fa3be8eb612d8e6040e989966c2d465f4f2d169219c3cd624ab"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "24583e011963426639a62af2c64018a65a430798b8f303c3a54dbe497200f7f7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "514d18d6b9502459829db28f9859b959d3e11ba0b79809a2ca5e84fbf2be9562"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "93901d955efb0f5a0b9862f82487dde655e643652e2430ee492c1f0689c0b82c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "2b194848c9efd08653385ec01fbd4d306a317dc34579715172c0f929977231d1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197032, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "cb56a1c2cc7473088b367c2c1b898da7f63e65a74eadc427acfad0224fdfb256"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 197176, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "2ef2a1635b71f87e4fe642e236f8a592a05df20e48769e1614e964062f4c29a7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "3a2342a9aa460fce635b18e4b01611d025633b64bf23bc3c7a66c8d8630e826f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189352, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "1e4fdb5415bc215e0c003c63037034479906076900f7b8c3e4c10b70ab74f86c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 189496, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "a87fa598455d77cf9e0237399795cf4cec23d32bd9b55eca11e398fed85df708"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "0b51f1851350ad93fb2e3c800ebc5b028df7577ddd6dbbd2c546b0be7c0c3b69"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "c415cb4fe3ebe120b5cc3f6cd0ff052c560da8f9de78a8e8f1aca1060ac0ce6e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "1a0f2d23002ccb787b776c9898eae576ee9c1ed9dbe09339370faf27bb806157"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "74c261f3d2ccaf4d571dd5860ba38fdc524a9e13319f94890ea8a5c3facd74dd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "7b6549edfd7030e549c4a66ee81e6aef00f6c1b18f7b789bc89049d013427163"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "422700f1b56b18db6107a840b58276929ef59d2166f19db23cdcecea92386f0a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "dc7744de5c15cef8359f433dcdc74191bc870d8db9d224ca286357a7a058b628"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "510b493ce7a32041c6c4cbf3892447ebb440e930f9e5d42baa02f8f649545560"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "fc5b286fdd283379f571d4c55e87a2c130c3508c1160be8399a2d4c529c2e154"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "078d855b09d3fef6d0150cefdfad733c29869e7585108ecfec0e6aa99db3a213"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "356f6047529c7958f7270a9bb2c6a926e064aac1e86bb4155bb09b21c8df121b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "e5eea442914f5d04de2766abe0fa9e1b9861324a379d5201c98bd88618bad8d5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "9c5905634992913b7e3eaea53483df2a65d0d64a3baea96c9ed1e5ce21f09f45"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "2f2281ac0349614c0d7eab879a85f1a5207a83ebff46607bd08e6170148ef06d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "a29eb63f33dcfaffc41a560c1ca8e6c52b8d5153f544c3090886ee57b0be7917"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 212392, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "a2741ea7047d8f61697b2522604283c467b6e51e3e1a80780fe596603c87d931"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 199720, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "9c171e8042c35c76787863ef029eda72b553f1d0e8d10a01a837432823d3e768"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "40bf51c7aba59c878ea6222e0f85253ee828047a48ea0009c24a0d795895faa4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "d7db11d0c9a6fe95ee3544f74c73dcf5ce29884c6876825afdb345d16db331d5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 183552, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "5967c05001796c659fc7dd98fe1f1af6b967dc02828084743815f5463be53bb5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "b2ea04109d0dfc4157c5c2e74ff16b4fe30fa9cb9897c761ae3bca175c4899ee"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 168832, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "0a30b6015b3a35763363f22cddd022e7366935caf61375fc487ea1aa9057d646"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "42f89a57363aee48521d686b772d38b474258aca52cf321951cd2fe9bd6d9a54"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "0f9d463da43042db18b1d6159f2bfedd6d96e4b6c0d56f6d9373c8f63d90645d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "7515b857c3738b651a213fd13f83c147fec8c598656b0ac1eefa15f98cc7844f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "6ebbf868a15ffce5333c025fa93775bb1e0667daac2760b8ed00d8190eb9fa1f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "9018f30f357c89de4d3a1626468e7cce8c388df56cfbb8b446e4af969853f15d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "62e6fce518a44ff0a4ebccff7555a538e4fa9340cf9b4ca08d8b9feec6952f88"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "947ada2d7b4d780a5d33dae998ed1ecf537446b245f6c54e378b285ec6f9c6eb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "372e0dddba2bbabb7d030e36f21c4d11ebac5ed69939dd65561928c8c6116a16"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "f9d83780da37ba747846cc005712a311a1adc295b13f1cd887e759beb88eabe4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "3b3d6ae418e12794a44d592179e6f4122a85f669108df59a8379bceb6cf76e56"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "0b672a44687f28f2ef9b2970b470c1f189ee4fe0130c7f88adc0cb3dcd1b7f1b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "39b044b0e8dc749b385fb76adedb1ff9bb6ff8cfd5d4b0a0e8b43939f8f7326c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "8700c3bef2c201629170b28b485d66abf8b3b55d0ec3353e82efd3ce9c808e9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "a3d1eb430ce444f96983374851b567b47d5937d98f98573bae34363e26c54e88"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "80255d3b5760062f11af5707325a01943efed5ab61f4b2b96a65daf65e02676f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "49a44cceb13f5a7eb956ff3f13e5a57d30027cce788d14534d88a36b95a6fdb5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "1817cb3ba25f501abb56fadf7399d38759318f5ce52fa9b4d55cbf2b8f5da8ac"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "d283babaad1d664de6c6879a92ed1fe613abbcdae1b6123d56abb63064c67b22"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "b58061f9a92de14a8d0df43a8cd5cb391504f1fe4e6eb401fa4503f421034d71"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "5fd4a543f6d150728ce36fbb931128dd2b2099a5282062a1cb768799e822b6bf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "99bfa4f94d554c484edfe5ec29b0c1d58657a02c34a37b39d3dde1db73490b4b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "6b92d0603fb5853ad7172c02de73f8ee58a13ddbf0df40905b8c6c047a2e9200"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "2fd7d57b4a03359ea5844a87c89e02ff1419becd5d3c6eab3e5a8e2a88564b0e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "065c744732984691608ae07bdddab736c4e9b8319cf76d96a94754b6bbc9814c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "1d7839b3a83e2458ae8df19269d328d0d5da2e7d625a87efda92accd560fd31b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "3922ccf73e49510f9e7479c3f8d2b039a572b3dbad00cd962654d8fcbbc23784"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "67d1bc2bda6fa91681ef0ddd959a90f4893c8f6e9cae8f22062ad13a6e12decc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "c2fc74823108583839f54a69369ed340c87e7aa9b3466293e63ffe6bb8fa77fb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "22932d16db38224d2d77df398c584caf77fea75482969e2598367f632d636020"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "1cd28d9ddc432efc91987fab8e49b9f975c441d36f996fd106aedfff44e3e972"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "8916796be5dc5ae7a09494c6f3602f6f8e27c1d99f7cb367178e74623f6dd926"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "dc2c29ceecd0c2799c95fd67d770dac2687ed37d21e02dce2d6a200806a18d58"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "a10cbda5f83667c7b82bbeac45ffc7bd707eb4a30b936e47800f281413bbd7a1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "5bc3e8b0e258f8f1d73b6abceed3d9b915ae76d1aea7231529b426770631ec1f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "91e84dd3554ab28831075dfa4e2bf4947d8c79937c9e558beb1d0a146ecb2477"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "ee042f015f96b460ec04f72bec513fed91eda58729e20b2be69aaad28621c23d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "47b111cd1e09ac79b01b73f2b3f8ac821601d0bc28d7e01ce95971eddc85bf31"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "f9943539a9f35065eaab792df4f9faf7dc0b152efae95503e8d0938dfaf69534"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "7039ba625d9b60e9774ddefc65402fc984ce868d82a0b8a5087145dd56a80ee5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "08feedd80318d3af40910f455cd66aeda1b9ba2352259fb5a07d4b7a996a95cd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "6dd8d23108f21c5acd85240fceb66c8a6c32266d1ab90773b493447b84335e2a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "961abb7bb864be114ebcbab3e8b112eed6c65ac18c74e668955fa9b33850b269"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "8e5d6a0236f4fdfcffb9a241b286cc616d00521313d98a369f2afe9e36c5a7c4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "a5624a97aac9818050d9e98cefd087b7290362363dbf0ec3edd177c54b6001cb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "ac59e7427b2b6a7f250b90db4ad308b7b2c33bd5ed26d67ddf512e5d38011fcc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "ae1cae47a4ebaa497107034f8a97d0eeb6c3ed3a1ee5931ae946dce67a8ad930"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "b4b01ae25208617f4600925b0069f81e0fd228f806c29b01ea17d0b2103adcb1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "afac2ba0210bccdab15d1fb87c8737e518d37f77a3d11f108b5bc3183b05b565"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "c4897e3a1a5960567fe7ef457282363eab8efab182558481ba9a9e7744086dd3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "3ad51a8629cc95ad4ad7a01654a1b8f17d3cf09d62e2a752be03094b54e28642"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "8bee4d9ea20972af89a39fafeda43699e288f9ed1b9ac2585924d9dfac71bcae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "d5d54560d79617227621f56e3fd37d1a61b6b480c663b4a3483f87c2843aaea3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "edc353eb045bb86be2aca000ea24bc1c874b1012992ed5c8ef5419856ac22738"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "4ff0bd2581289ce843254ceffb3a9ef18c0d8e72ce71917cdcd4becdc68c867b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "9ed3cd5ee7460bb10118de342a3bdd54550068f325bd892e74820b0920213393"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "68ae303f76603ba1b4eb97eeb01922a8de90d038cf44c8acc24a036b5c68e9a5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "4d5874aec18f0312f83b138dfc2ea19e6ffa997323da36c3563c9224d36689ad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "f4e4be7bcd66777903db196aff3897cb445ea0e7b1cbd3c709eee11ccbac3477"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "0844d1ebf6decfbf6e67d4cdce84060195ed1864d2525ff95f28c8c6fce44860"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "4da253ff550f0a87a4aac31500865e3e8edece45c408680aff18993638a93f46"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "b9c76f6b1446e934f8a33ea9371054dca1f1d3b338e46e298d83944bff7376ce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "76a8e4103ed0e68866036c63ecd0002b9ea79b212e044b9216510d06b40ad2d4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "2e31a89b7a2aa0d5d6ecd86349ec07d06569428b3b7ce62234e9bb5e1f0fce6d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "037d2f2bce833ef171703157d84c1055b34a560e7916a17e123e5243ea14340b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "6c8ff4511ecfe862444d2f3e16b8024d409aab6112748e018af45f305ff7b274"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "b917257325c6d59d56dc178bf1dfe3eaf4f851f5b74c3ad77d18e72049dc0348"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "3cc5ab7d2b85df97f0fe7bc22e34d1d0a549c85f0c8ad2a7ef4e0d1370c66eae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "e6813deb2323cc89a6a7c2aecfe53ba7f93072ea322cdbaba2732a01db16c572"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "142c3f8bbcaa2d78ee0e7aed8c6ff44d47edb6dc5c2978c334cc7854d8a71846"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "56b5d3203b455b34e03976610a89675845ccf410c50918ac9c2c16cffbbbb4bb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "c4b0226678e99fedfcab123dc5807905c1cb745e476f2b5ba55749091a3fbe37"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "9243bd381ca4404b952f1f697f0e9e0b1c5acccc7a39d930be9d335e29aba0b0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "8be824fb869ee278b43cdad8c5421c5b05118e667be5c3d223fa280f35a6d779"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "74f225fe4c5f9ae86921cdf467cb54c1b04f5c775b83a3af69359af6f695d7cd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "70e16e6cf837d367ab8e250476be147c0ef8ca5cfc1f287b0ee4341f68c1573d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "b64ab94a433283aa644439325bdaa76634b7791e7367d826a0bb9438d51f5918"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "50f92eac42a6eaae3a02b24d7484547c9da1ac759a2cd9475759767c9ea9fd37"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "0cf4553739e5412dc322f553d7cf3db075e39f1a6fa65b8817d15472363ea38f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "03a108d374b496dc8e72eb537446ef0f88e7d76340b2b691d6927796e2f8d857"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "b00134fd3ac31a01b380949225d6accc5479c671d9ac45be90b6fd02b0f78a22"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "6f7409d232f0f02f9dfefcca384794b25e9c77ada0e7cfc3386bca98cc0c294a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "aa3a33db06ed083dccfd68291d4befb42504cd8b1655fcf20cd2fb98fba4b951"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "84d36a01261b26077ec5af2f96b5e7e537061aff787e6f12cc95ada53d983f5b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "1b690a0bd2a1eb9a710687bf50897409fd846a96d6ca1f24202294aa5a060249"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "fec4814b43c69e6821bc5e92ece1718c8a3f929ac528667d48cce1239abfdc3f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "38665a7361f1ca8d573ce14bdb49a6cc7f958fc55abadde2276d4d5f9f1a5280"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "8093a64f3a55718de6b2d608fcf67b5db738a1af38fb7edb910e089e4ee22ab2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "46e8f05ecc4d65c8bb9ca8f5e6e31822275a975d4ebe69219782bfa4b4d6c106"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "994e8dd71822ca90cc22ec4b8499666c1ab9c78c02ec964da20d1c2567f3474a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "6be4cdeb4c72bbdb48aaa31db5ef8e2ee70f7a280aad2bd60cce58fb88bfb85b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "b6ef589358e016fdd7d843c61028f70b2db8970af0a2d65f0c9f8c0c3443aedf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "8000fce7f628b2d3601a766d87797caf3579c3d04ea11e6ee6127d7e68c16a18"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "b91eea4f6b9df8f5bdab9dce02ae7cbd2bdefdbdd7366f0b3a3484f00fd124b0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "eaf6b23a45da05f4e410c1c6d37d49a0c1763effd48998dd6672b01da2fe57b5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "17dfc3e44f1c59d5b8fa78d60b83405217586d180c31672c9c4d1e9d7d9bcabb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "dac5707b72da796003efd688e4737c61c574c57149e8bc7aadb05929514f1251"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "77cd675dbc5d6891d8c258fd798113db5b232fdca7776d31c63cb4f9856b58c9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "0f9031f06f50e649255722c4bf76c224550ad9544fbd712b06ec6030cbe187c6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "5c73a85b6008b9330ee7196a6b9be23e772719e478ed22ef97fa9094e2cc652f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "be76a3c8e63b046cc916e94d365e9248daf0274f617436b4fae1155726d7fd90"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "28ace56392572ec2f3aca760adcd335622b3b3c357b19fb834e6d120fdb5ceda"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "6db0703dfc1a3ce993a72d5d2f7392111c8067ea9b888b278e0486a6f0be8f4e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "d02f437b8cfcddb1bc9140db9c3f07c4180a8366917c15934c9e43ae844f628b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "8884e6fa1591d479610c3fe51c4fc0c9e6b583cf1fee30b9efea3b2ce39a212d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "42fa4940f8eb6f88a49299983760d412611eaefbfdb898e47b99bfa328a9e3ff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "ccb7e4f19df774cacfc8d685fe7ad2536a849de80c9d6cdce319008cb10b8e02"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "7391de52d966ec527ab1a0dac43e2f788d6f2c85d4a5c46273b1c6cc6d8777c4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "773d999fe330bbf6bc214a2d1fc0daefc0adaeed10ff135d3eb707c5f5f01a9f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "ecb49ff6ab380c76bcbc3d2cb9848b1c4d616dd2a302a4e60aa6613dbc754271"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "6bbfaba3a803b240d94a0579256143f49d2fd954a3399e71309146852d96b13d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "48176a2a68b0226ca6e19df8de3c44977c9553bb65b301c5a11203dc9d81e73c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "b3ad35774a30fd7b1f606afcc24f7b34ed10ed65637ced8139a6a34b9c27e2dd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "f514d75d62d30ba8b1f6c152cbaf012b192311c9de820bbb212936c696b46c2c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "506913d2207f96153a5164e80fa433174a7f239ed59702714c27f108d7cf2d79"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "7f2898e863487e079bee86bddbd3a1239b9761c3d671f5a1f3af63eac4e81625"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "4e8caed0ac554d796b7a9f53e74bc7faa9cbc4d8282ea86e62e2975c89ff0f96"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "d08e6d93f5d6c43f3f99e07e62313e576e96992a8ce0a430219d0adae1133db7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "207597e29b0835cc3e22be0c0deeb1438a2c5eaac748ab36f25e7f38215c36db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "89b6360b4a29518bb925a7b7c61ad5a071a2a82f0cf168c975112e15132122b3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "0c30f3a6162d5e135cf6c65cfe04f828d8bb38c63a2b04bbe41d71b85f27bcb0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "4bdbd92fe49cced2e0576c88d9b0ae9ad3d92eb34cbf871d7e08466ebe64db59"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "d7e15a45df76550a43fe08e036b2f0e6df72040d626077b73a769919c89a78f8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "0444ef7854b4a6d18699bbf0f290a7c0a6d827934d8d55092e9576791195aab5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "c5f043f2f69ccbdfa3dc820474d57eb383aaf35b8c08f8fe38c0ebeb1ea5f251"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "5b2ab718948d85a38327db730079a74eea5f609296187e5e159e205a2ceee559"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "779ed1e2b63b8b1e5768807dbf5039399eaa9b779aeffc4194a9f74bf66c0e74"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "cf867846bfce71e5d87d265d6c22fe29647c89a09af183dd191f89a2914e316b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "d70ff1899e68b66f3966c7f1cc47e046e1695b6dca69c3a227d95cb1f8e3c4c2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "70e1192c27fc4ea71cd49ac26fa0d367fe5ecbf31fe6d229a8ed2aadfa0584f7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "d0c7e21c69531cdc2cbd7dce182f41550a2b3c4144ff6641aac85e15c21ad493"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "2989bd6c66d3ec5769efd376982570d28a6f90bf14bda5dea5d19805f3127ebc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "c2fce81a636497268ddd461f3d81afa6e6523f039f62b45ab50ce3be1c1832f1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "3f9f435369534f4fc52c08f3256fbed13953d599fef5af9ec7b6385e3fcccf9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "bf9fc02b33b4ccd91ca6de6bc0b10d1decb7eda91a5b144fca2ba28117bc6168"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "47e4f38d66dec215a47d750e86c5af9d04dde6a2dd1f853d2748c4370c02a237"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "bd9c2444f54ec29e574364a18c0da38d02f4589d247a5d66ce39967620bc8db2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "cde975655d502c715041ac43b075023500bafa1005fca3f608a1f06368a862d8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "87091fe99fe9b1d83d3d52e54c982d8913c1be51413e779bfef879f826e3580d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "97a3fb287753ae89d14a81a0ec2dee902568da717e35013d9cf02ca5bd570f9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "4dd526656584b72f0e554ee6adda571c9c0f304a24c5167d0e277be279cfb151"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "492928386533e0210348fa4f90090fc1de709960e9fd46caebb720f070923391"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "f0dc34f9ca33d33b2e8e096670bc490148baab59c7ed59ffd8170f8f0afe5232"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "c8b3414a34e82be114d707552d7a9edca72810ef7b95fbea16c3bdcbcfa5e28a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "a289e6691924db819342597922fb79239879549dfe31727b76f0e78b144f7df8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "970ecd0909e417cca2341f017a9a92872eea8186921214b5d3cde0e2e756544b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "a0e773ab30d775a47e948789faffb7f3ea068c1e49bf984f7aa2952931d9b4b5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "1762b5e79bfed36d4ca331e019cc8d997bc6ce7cb875ba05e5f1dce905850506"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "e4a8be8ed6dc8c01cb538d3175b1c429b75a77976b29850c0bf79ef24fbf1c73"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "5a38cc5d610f04480e15ed15c37d5b5c046b71e251504c5ecf8e7f5045230330"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "81e8e654fe7ac81612fcf60ec83be0c337255a2f6b22f66896bbc77a32aef8d5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "f61ee29cd7354738b5a46713c85ee880a4def5ffa7742dd3afd810f366bf33d7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "a05612285fbee811c2c611d9330282c22c9b59ba540f74dd47b2dbdd780b12f9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "672dba02306e36f8db13431f097ca617d809c44b6f0802d97bcd29c33e921a77"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "bc931f7a13689c212e7ef8a232a787a4514237fe279b9b9366eb9e77f6704e37"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "bff85f1b0ef91a8e4068f78959737bb0a55b8cd96596b5398c6b6ad206d23fc2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "e858010fb79997d33e8870bd8f00f4f77d206d886dd09e360f328655f97da146"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "9c01e61d547a051444d4341bac53138f9fa51e44aa0fb5d6ab66479adcf61694"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "e8593420e6d518ce627c46f166f1b130020cda2a52ae7f46e250e36f8f4dd64b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "49c3ef0b6416179d0478833604ca0bd72a50d6c1f253d0dd1c93c41e8d7a79dd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "36cd8f4735665bb5183f93bb64b6d7e312921a51da743b4fae066f01e6473636"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "cf5c1375c50bcd6dc55d89ed85426e35779c0c0ce789e4b597e689c8c0456418"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "574eaab5d096f347478e637144c30a12f3aad8c8da573747c8d4ab35fa1d6da0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "a1511a86a6670676afe5504c02f4388ec03d41f90c14cd22b2bda4db8f3dff18"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "04f9a39ae2c546c935a392a0d4b6eadcfeabea440a27f18ac120a005d33bada3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "1c3d8bb13da7a41ac33f3438a5ab6fdcadda559ea18b2058d718d6c7361db347"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "77ddafac68919fe54dd405a8cd65158cc0be884a6498ae92bfb42937f19047f2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "32dac93e3213ba5a860b26aaca6525e3f8817bd6e2f30b00b5eb8d853c53e909"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "c7bd858ef3d8b4d2a5d647f49470d0a7e557490ca0329dc91263844065593ff9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "b91c372eb01c1018156f9e106488ed5cddd51206a1ecaa93544d1489d10ff14e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "47a33509ff63af3a6c879031a5d81313ce67db550e6da30744f064fe9d4f9f27"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "280f7795384f054f249520e8d8b359dc4e7a9ba68895b2274bcfd2c8e9a65a75"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "8726a3d581a903760a8f5520f99b2fe3358ce2ee4caf5c2c168d5ee0a5b63673"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "46fa22b9ae620b01d1e31747facb2da65832470d448c0f8c4a73c77bbefb3a2e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "a3c571a9b1f4002b6020e8d6ff5ffcaeb1faa7c6d8921ad49d4a1e97f1655ca3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "d6fa79b0ba48c2486334a83b9cc6d42c271de1f77ecf5d63f16fa29f6c8ea74b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "6158798e97b58b34a5d69440530669dfdc429b94ba687e2a0bf77f071cab3453"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "da238f77bfee8c850c9a8fef1a86b6a9fb6f39042574269a9910be9e028a31bb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "5e2f73777b1b210a8da1753f7f39fbabf87919a4cc9f305dac0c8390efadbdf7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "15f53bb2366159dd0e049506f5caa982a08a1dfb10430f7fdcd206a10db1a90f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "229c5e92edb3829bc171752d2ae8ad00722e1905d22b1d9e64dda32adb183314"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "d0f9746643343a5b352657eed58307b4dffdb4ae78a678a6d10a9a5222237ee1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "57e66ce320c17b288af2e9c225e1e33b09f0efcdd52e643f9f6e53fe5ca0f7d5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "1fb97c776edaaca16ac8530d91536f1befbca395f6666d4110e4517d9da3f92b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "4c1665b2804406a2b525831f894218c1274a20b8ead6c9b66061f612d19ba5c4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "e8d9b07a57e20b201b4bee94f7d36848e80821ade18f35345671d0b85735aef3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "cb1cfac6d43e653c51bdd2e6cff14d1b641732963d1bc68d86935103b387e078"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "911329ef822d173b2490bdb3833cfc02e91511bc4a21a13853c8685e86bf8727"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "b654beede1b758a89e2fe39f12491e68962e6f1523de02f398736a68ff238441"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "ac206d08e2ea3f9de61295340d63439792c2f494c467208e42114787b9d62137"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "2696734e47966f0397046f024485ca8badb27580cdaf71d130891281ee62dc61"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "7b7f7b954ed9810cf4fb5f350bf8fb027971556dbd1c3f9f88a932c432f32de2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "57ce39796a76700792041ced8c3a8be6579c43ca58f6a94cff60bb74ccd22793"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "e0ae9271bcc7d02f367d8284c600a8df4a4391248cc7c39a3b2ae08233b2b2b9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "f0bc8c562385872eaadc485b84951a5b2dcb19aa7c8e277d10769c2ab27a9ca9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "0654f617b65dc9c4aaaae02b1c901415717969c2c7d554fdd4af34c1685d67c2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "29a41ffdc123e9618d73b01eaa40de644cbac423417835d51dc1d564b7431115"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "1372db24ccac4e5bcb18c3df741c273d6c43b5926cbf95c38cb00353e82f0a1e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "e65cb37454148ddf6a6b82179d402bda9f4780bc203085d9a830be9be5b7df39"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "3cc52e81b7c6826e1e3763af051325132f703c7521cbfdd20836b785027253f8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "c90027e6f9d6ac4b42c4c7eff4b0ca09890267876120361747a6b0fedd122cde"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "a689009ffb66e64c74fc5af59db14295e729dc625bafa5368e24964a3022c8ff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "351c4a96cb8bf14ba398bdae4077f8499384e120b19ca6c83fa4a99e2daf1411"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "616d693b5bb52f09977c7a86c59602a66d0fc0c6b96400928f4f2b96891b771c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "5a20e54df026020e387ed6d5fc9fc52bd7e8fddcbe68f37c482b8ec8ded2f0ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "e7c5ff31bebae50f529b4733626e2df4a9832ff11d1e1f9789eeebd80f24e9a6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "e7395e6f7fefe41f6f1e05713b9f550d51448dc2e6d7501f260fda21df78180f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "10d70f39d19e0b2426bdbf02d8235b147c474590abb6a9086f661121f2cfb352"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "8d50129fd8d2ffd356bacd3fa549b83d2599f98ece751e47c6627eda80b4cfc7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "35bf526d7889896e05ebf3a9bff0add26ea230b52a841378fe9cecbda30a6170"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "9a593e2787ef174e16842e3c30d66291beba19cbc81c254891e9e01750c85e3a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "4aaf10af173956f70ce1d79879d9dc121949f96aa832bd30398efba791c4463f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "a0d84d5884d159b3aefab9c1020eb942cfa7c1f0bc3eaea30d66bab7012dd7fa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "35c953ab1879599e511edc084a712878c9b569a125861edef8abf3895b8e9430"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "7de8d2031dc86d4601d379265004e1743b540d07451cd753ddd4bdb1d6301f0e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "fe13fd3d1a3c877ae0d02f7d9e2ec6a9905da95f2f40ec96a33b6e9f851a2829"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "0b287fc0fbd5225eb5ebdc9f6ed605070092de09b9c8aa3e9f1a95cdb161a218"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "5dea70c2fef1dc9aa3cbbb5091f30aa06234a49890a8987eb9cc6dfb141f5141"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "c6086ff240ab61c1e2c5986a1e8f837db6a8ee90bf7ad59db0d8f36b3af85a88"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "745f7f2a1fa6756e9a6ab78a001b6254e1fe826801edbe907353adaa0de3fd3e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "f9f991de183c24e6d0cb99b7e3eb5547014407854a35d5bd95a20cf442ad6ee6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "b3746b991b75f4bb2adb8546f03fcb5213defa3c2fd041e7b3901bd8fd6df20a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "829640287b90e55f50f92f8f5beedf8737704d283a775b49d7628eca144e5412"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "0465d2fe415c1efd2021f535b3c949695fb04a707975ef824d8461e2c20da649"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "f83ffeac813149fe7dd17c3c491ebe42941577708956a2ec29d20af981d6cdd7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "7e4e0f497713ed1ce05a5ff458e4609c647c329a80c209d60afa6499483cff91"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "ef29fc92592eca6293c206f8dcc494fe089dcb82ce36e380ba87cae1e8d19d50"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "e1514341252cf147bd3e6c1b52fd0f2f3960479a1dab295343ebfeae4ca35578"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "b2f9fb67d67ee9bc9ce7b04830356d782b4b0e4d1e651bc0f0ef1e8cfd887d71"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "c9ec017d9f88cfbe2c7ee10d21b3d9580af985aa7d23f1e52c8040ab9e26b403"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "0f1ca4faa2f8af64aa427b0d18706ce5e31b9c871ad87c86943493be26396e2c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "8b55ef5ddf7bd10404c2d9f5de0a141180bd0ca485fd36b561a7ac9c6850bd43"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "babdcfaf02e4d81ae7685dcb3b5f3d352cb785d697f789c3e3060373f729ea54"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "0c41abb476bee5fa86cad59c368f43c40bbb07428bdc7f928e36238226fb98f3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "c665bf446173bb9ce894695ddd1980302f739c13b0c1716f6a5e02b53fd2130d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "ea7d05a941ed46ce5a1b78042d05ce21add1e4d564f4301629b1d0fe69ad1068"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "69a8830e991f52b61b3a9bb8170054ab0024a8410bad3e85fc5df23c77e617d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "1b87d92d237710c411be97d0dcbdc1a87b0b3ef363883905d08d6a884899ba1c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "2076c0ba691bbd2d700bc2b08bd666ae55eab33e29202e5e8c035b5da18e25e3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "2a067e422cba939e85d4ac9657da3f935e04018d1254a69edbaf490746db64a2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "9eac593ceaf5c7bd3b1289029ab7517fc33b17438ac35ae9ad17c465548344ef"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "04e1be968e46748b0f86a4a8891f9e054e4b22c652e8b49eb3905ef01d71bd79"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "c69120b6cefb940096fa1b279d82f5355ab552e224af41d8e38f92d25377c26e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "413288f1ec73d371829edb7f95f9c9abee68811f24cc573e0d498141709d233b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "05334ad5cfb0d56b4026178f1c38c1719ff7c28163f6e3860cd4148dfeddf835"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "39bd4993542655fae5fee04854eb9afcb2ef9b94e5c5c97764ac6c0e12ee00e4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "3169aaa6154965fd807cf90911cf88cddc36e72436adea42e89a84d487b47e12"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "759b956872fa17b2598da8dcac44812011a79317c6779bbe10489f4b0c57d38c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "df3bff87133d14995c91f238cf4484d5506bd431d1833fff09d8492b56ca3568"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "c95b57e92d94a6ec0706ef0804f236fa67bf5dee2842e4d3548fbc92b361685f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "1142e6fd9b7e1f5d97ff687da47e9cee14582cd6af3c15b291144a2e9678b596"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "175bce13a42244f85a35121818e971faf638ff63b70c7e4127a68403953bf6e3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "4bf9bb93e81bb32c7dcfe921779c587e51a7199b5651643a54d1495201c0e628"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "e7723009fd8c86a0a90e59788c88c5bca090944cc9393204c4b40757fc124539"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "655beb47d274d598b6582657ff3404524b71ca32a180926f7cd8c248ef085132"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "da07f4faad108c61e14d67436659578844ecf16e42c115d1ff51e7fde1b4cea8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "f5a41139517afdd6117bb49f7f9e2c5dacc70839be702e9037ae9a35e6d3984a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "6d9b2b46c49b16d4b0190254b30b9f0166a49207f0e8451e2e168e511286dbd5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "2fc2e93a1c219252b88436229422211d26d19b0d1cd5fd11f5705d46224bc413"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "d2291a2b4bb907b4046cfdafc8308aab68ec57a7df3f744d44cd226ee94c57c2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "c1abd9090a6a2bea51a51f662e8aab361ffc1006a81438a1b005b275d1cad289"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "bba809cec1d6001fdca7c561a499e2570702ef4fc503da550a788331155c592c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "66d87f504b9fd4994f6521f7d9135281f015ddde2014e58abe6165ccd384c6ef"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "6a2f99f93bc77eb3483a9cacb38fe9bb8890ab7e8aa114b61b2599da5ae10c1a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "0c5cb40fe2351b5e8ffdf8f1af983d2037678dd7969dc8a65ead103cbefaaba4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "dcbc1e316ca85bc78b9a3831006bd219de21d53bae289d5316ce322eb3ac0325"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "ced29a292586fcfe7dcee3fbd8e8c5eb89a339fcd545a8ce3747382ca3355a04"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "a4b6e4a166822c345db26a4a558e820091000e9e7ddddd4fe00641231bba1cae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "359fb5ca70498e69e01b95eecb66c9415a994cbca52ca442fb9ce07d42437e85"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "21bb240a896dac0fba7c1b8e6d4cbdd6170ef5f9a51e3b2ec09b532a1f2be01a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "96a4ef981913d74ddf4ecbfe51f08cae9c1aed9fb4c82e2f9afb20dbf9ec3dc6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "84a4b945032d31e475dc085db8025c5245e0090b466a885329060ca812dad353"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "91f9c85053cc3ffd90ea5b2dd95576554459703af8d6359ca587659e4c766e79"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "f90b44bd2484f29c16d72b1dea7b58f374a76961af688d5b982708e48f6e721c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "1ffe68ad641aaac4f99b9117292cb622f70cb6d71c692135a344810f9072516d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "e8ecbde5cb2ac34e51a45199472cd8106fab5dcc927a0a2e7a35f5ee724aeef8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "923402943e6a0d9953f72d2be05b2cad2bc461ab25f5d08822dcc21d16cf8a26"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "f28299393ee62757d45afc0d89bb34a6081edf145536c58bae6f972702539513"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "c9951bdc490ca7486f15be1babcf335c0cda531a2374ae74906431c029204938"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "904f49e63f771604c7a25ee423fba448b11e08471392b6f1645e25df5d76dbf5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "7a3b57d178dc844d6cd683fc28ab15e1560c825b5e8f79f45b33cadeb82d0520"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "82bf8a008db21465f65fb239267ee7fbac4c8e36309984ed4bde36233847d1fb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "00f404aa8e6f22b9e1321c8d6799be053b5ad63b9673c1ddf38c1fb17e9f616f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "fe3b3b6708cd7fea14600ae9e0449459fd20723238465107c5e30fbf8a088a92"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "20e04c086ee5e5fed2a3f9515788827f82d60b5e0e56392e65ba7d8b1a6e7938"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "4b3b33bd8a447f216dd254993c8274f38b218bab25e4b9e06215d1492a0bc401"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "7dfcba17618766c55963d6b9015d0459548153e89f7ab8b343c796a9a6ff4a57"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "d243cd214f051500b70bf5b7a56e0206426ebb60c05d532bb3468d8b32bc8f8f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "b499c75a23d02aca5a105c9f2e1673b65447e95d2491e389547eb2b12cd76876"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "467c1881a58806b237ca344f11e3eb0654c9c40770f45f5d01dc92c9d0f2ce3a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "7c59c1a02d9892fb20ac03acb805840597efbc78fd43642b4cb72ccacbcf0b29"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "ee9227755d93c6dd94f15fdde432376eae796d36ffd751ec041f7c4363a7fea7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "bc0d39b087b217fae31c136548a5a281b522789c94bb371313dfa1999dab2011"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "f722f38d1df8782517ed9e43f559abfa796edc2841dddd356811a6b4ff37a773"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "ca11cf6a40b1088e08a37a51632ae3272a50a19b8b16cba20c15407b6d3963d1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "19d5cf6658562fc75f2f3e470b3e2f3de975d85ec8c4b7b80351d5ac59a79d27"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "8521f940733220ac7f696b8680406f729710e6844a367ebc11c9a4359da23983"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "8e1a1cf3ea5a31ef67ed481186d08c47514553775fb332e1f41ec02e3c4f1105"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "1fe08419c286d2741d6443fb65e4e584425fffef98c9aa3ed58e87c2d089d060"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "f0ad7ff555c13de1966052e7fdab1a7f5d22bcea270a607b15246e1c07632772"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "dea7ed6ca8e07ad7b1fa5e370640c6427b64e03cfd582d73c5c79fdad7d8899d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "3483665e12a21793da6bdfc2d58f16280042c18d3968cdc44fc970d602c7761a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "1a6b8d98fe17762b81707b3e4f6b1bd3a1da076e655c73347aa6e4b1cee3fccc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "8a5c0537a1d9885071cf37442dc09886b0bc61ce9d53eee6f774e2c97522cd3d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "4bcec53860ebb754e9b7c502b5daac14776a2c668665556c4bfe792fa5a64d68"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "be9e928e1d691b9d43defb1515c243fd5c23e348e4b937eddd5b3fb1e65bd0f4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "844b5fb80ed24a89dd738d817d5553eb38636cbb66db43633461027938de2c64"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "a727bb1ab36c3c167481636fc1585248674f0c993ab3ede709c5969f541037da"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "fc9101650f8b959358092d11149ccce8ed09ca0e98ef3b1c811b57da197adbf9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "19318ef0c226c2a9aad25c3d1117334dadc91cb342b39041740e5718b66ed1f3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "c85987eb173ba5ce91661bfa0880685cc4ea9bd8022b8435b8843344ef61b041"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "51d537c23ff4b698b3d606c4ae5899cce814fba9d2df41103217d99f1c7515b0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "e7e1f847e71de01ecb42d153312245a4ae22d02f5966648016a8826d6b712354"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "9a07c373229bc37601635318f6bfb1010f62e497edc199ed376890e127f4bb96"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "f9beb00c84dcc243d656962eee4fa6f0725cc8f784533db40b8d0de93bbaa43b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "a97467b9710ac7ee167539bd2977cd03f806e31a56f301f897bd74211d234ef0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "57de85906b65ba2da567411c098fbabc9e3755be197a51aee6dbb74496fb55af"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "657cb29b41a7787927c0d298fd4c0de21ee2295ad77a0f0bcbc6cae15469d2d8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "22e88ee1665ba082bb774ea29b2f029c295765426dde0f95110e62148e94aa29"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "98ea42b2b963c3f9bf6d77294d343aa2518cd949b02bd678e7d64df970cb225e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "c89db0f427feebdc94b137aa672d8e7d9f4c0793df22498efdabcd35e9dbd6ac"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "74dcf2996469e3444d75c9eeaac2ea2832612d440f6ab70bae4650ebbbd11739"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "5536485f6327d530feb1fa88238b0b2094c292636089bf282a07b777a5500c26"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "b8dcbc1181096f3555e46f666d88102cf418a1176a7d5e28002a17baf626b78d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "e23d2796e0e42e4f568a0144a4b1f25e60ed214c6616b3627ab495dc5256a8ba"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "c113ed85c2f12c5d4303f8ec9e91e2ed6a8036e7b9b3c0320a8b22b6a74d3dc6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "68fa9371b0c538bc80c40da2db491fc49a6b8933d327e2e577568b43fff5b270"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "820e084aea045496beebda87d32cd2ac0fb1380b5a26cf60d16bcf55ab5a6e96"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "d36bedcc062f7291fb4a554de82abbeae75dca6e89e928831e24b21220a67772"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "b5631dd281d2477f9e78e6a73b4ace2c188c8043c69af80d17a06e9a99c1d984"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "0e23e28404fb2a66e1e350ad14bb80fbba3d7652c0a57e2f2e41135a2dcf0f3e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "020027dde8f37a23eee83b1f27accdb442f014fd3eac8000b08478a9845994f6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "830cdf1a37ce4e83a59e2bb3540ff44d522d4118411ddae5c7bd756ff730d985"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "db915e953d8bcdc04b460f89470d47b29a9a9b51f6fb4ec60143b2624d5a2739"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "894daec9c920349c5d6e38892377f928968de11319d467bfa8356fe01d9bd38f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "d8d69d5583237e668f1b8bb68b14a003bad07750e05aba4af5b7e13e1a426265"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "5fedaea0b855a0d95b57a458fbcfa25fda8a03b3edeaa582cd5de75b26281942"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "f90321a131b1fe5d3d169de3671cdc817999fd368ea917b3b27f16a117a788b0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "a10015c8e9dc0c5bb091e8d607367f0bc8a8ea37bbc8a70d05a7f63ce022ee51"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "5b6fd142638364afd1b4cd328bcbf9f3a9f27a1f68ed32c43930933128aed253"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "70ba3062de81d679204aed3e50d5fdf58e3a8f45db605250b6c9ff8aa6310da4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "d2463f9f33b8ac3a5e10cef90c5c5eacebef9c49de1f7117a55e5f837e680215"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "43922235cbf19da2a93aaac3464b347e5779c6b6d98f8804df706ba016aea26d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "c65ed412a3931801b4fadb948d7c40ed312986aa2f07a0253dd878903b26b4ae"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "9fd032bf0fe3a0e7e92e15b43f7f54d92559b986ddf71b6dbad1340670ea96ed"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "1f1ecda99235c7397c2f6cc02b9ae9a6529b1bedbd3528722a418e12634e2b34"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "8720fddd3188d65c0cb6d3eb031e7913a0cb83e4bc5471bbcad9c749f607f5cc"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "7b1b571040968822d3b6c3fd784b1623134864d3f4f97a44a272179d7d55ca0d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "773f50029d09317a354f04a4345d3111886da34e31d2cdc252cf5a6aa60ad3e5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "68019672f00d4470ff17c43bbc9a7241bf2fbf28d4f7b095fd578dcb0dd5fa28"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "b3bb2a8863ff3297de7a435b555e22ebf12bdef85ea8a5cc19ccbc5fe666cefa"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "b40490b3ba80ae91682360a424a3088eefb46668c658ea3b32d3f58bd8e52da8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "de6e5ae633e44640091e6c04d22b732612b24b153f7d8734e15ed12f8ca2607b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "a99c66bf528c9a99d490c29c5dc3eb5e0bbb6339c8b0b74e3cb85b05ffaced66"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "6a6e98367a3b564862e43e9542c74cf9bd7db4cf10a949f7b143919cee290d57"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "34fc2dcca49a4374eb05d14a1ef4b764ffef77b8722cdf0879478d4618df8c2f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "2f82c02b9f779d6ba192068e21519fde11b62f8270eab81985e86e5164eac284"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "0abfe23a46df8634ee0844de9b1b8156283b20719e669845f39b505686d155a6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "69a050656d9814c438b9702d9b4c024a460fdd38de85c8ea562b07c78cd57db1"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "e7c013afaed252f8b3145e06372b14979877295248be04a4cae9df4d729fc21a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "c116984f34ef06f81bc31a525a847e190beca6fb4bb1fb689151d9d9a249bf64"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "504dc590a0d6a629f91b8c80300276357c6418e2290cc4468479b16bd89c88c6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "e79f1ae1b14ad6f200a3433e8573a56f4dea3da28ac11d2f2902220625c57f85"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "e753ffd7f432e1f4d6f518b210f00eac25a9c697d9bc51fc9a31b5b41ed1dcf9"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "232e358b749099e59eab0f2a0ff6e6b2f84dabd1165672fff5a99bef03d692ea"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "0fbb47376c7ff58a32b0cfb27ca4fe534707aa9940a40ee253b28c28f6b2a3a8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "8f60b48fd1621c22e2962330b216d60586232e65cab8c7ea907e68737f07e47e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "b642151f06589e96d7abcd05cbb782ad31333727993e6907f6f877cb69013e30"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "bfd5f2c880df665e095780697cf6a09dd269c7bcced6112a6addaa3ac40d4362"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "5024e71ba89435f3c4a008e049388dc53fc536d58a7102e5dd68c71c00327642"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "786d164b3fc7dec539645b4894f2842d213b0352771d4a19440ca53b9aeaa88d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "7ac8297a085c5fa32f2b45d42c1911c20a6c22293345bc552df88130f75d045f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "686346c25229c7f0cae00b2527618e4ba282fab7ff6450e7c8eb0502a31e5c62"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "a2b48d8610399aefd7d30967f1359535d716ffc3c4bb0eca911e012c2786a2ca"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "5a442f1d6d9576a0fd71f38030dbeaec0f0aea1e7d7e25969dad8dcc447ee102"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "4da3e8bf3874c603a27a23fe9907327e3419a40bc512802fe5856deae850e08b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "6dbe776b00fcabee8ff2aa9e9f1d1b40ad73f7ffe2acc0891c97721402d59c3d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "5449f27f01e72ef317306907ce717aadc9ac81028ae7da308e4c3b5062e19089"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "83154d7f0521f08eee5a86514e1e8e1960d925f657055df0c576b1d30cd4c354"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "9ec527b5095018c15a6e5e7f5600fca3350d428ad593eab6a72d812019ea2e6e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "9ffd388a0a9bb90d4642ced426d5db365f601923cd2263ebc512da3aaa85abcd"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "7bb4f8b2b59d49faaf1d12867205e6d78996026cdcba7287b9102461e451c275"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "8478f05e5c81ba25f39368154219d580f6c96806b6f4889623da763cde1fbf82"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "8a2338df6a6a3c49c614de19512217165c1e69a86eb85926ce041ca286d627e5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "c252a412128e4ce8959249b6e944ef142aa6dde0c57ddb257222b4f469c0a87a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "58537bbc94a3af02a39ea160e65b0e09fcdae0225482158576d63a4a39db1f29"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "760c73c5c9d8a70973c055e81dea27851f2c6ab0aeffe5ec3be444eb5aba58f6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "9c80a679c1a23f2afda7e29a529cb0fe44db74e2465eded6a4b0986b1163bac6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "4bdec9c08303c9942cd1d3c43a28cd57e9b0530e0c5a913d5c80fb8059a80230"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "8602f087248839dca1e29d0c50939d5e6c9ab77b93c0ddea47c75f989aa8ab6d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "69a4862ae6d82e11d08debae3cec9373c2ab69eaf3ae2d1280c1efab8b7a0c4d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "2a31ee625c38eada41542a78919877d019419055c00dd04a73aa13eca0c4e569"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "ebb3b82a1977b569f00613e066f59ac16634f9c0268c1f4cef150115ede56cf2"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "9c2598aa1f9af14b1c8de0a03ad0238627a9deee83f0e7545359430c5fd5a0c8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "47428a9deab838a015d090ee35d4e10d872288ecd90d33f7d0b504bc74db913f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "b4258238857fd8e345f54823f94fbc5e6d73c2bb7f8735eb9b3a56586389c623"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "75162ac2e90636cf7c7da3ec0ee7d33805c167efd5403e200553b12ac95b025f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "7454807a8dfaae9cc1ee8370d65c8fa57bc3c514b12baf612d1901eb95d7b3ad"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "73d4d6efdb46466f2b6508f1851c44f9c3619b2553a6e5038f2ac3a6013e780c"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "cf281e74a6c849302749ea7afceafb793a4548e67ee118a843ed01e1150f8bd5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "075e389fbd5c8efb6c9b4286f5eec9d628ee72553c8050ce64d8c780e753f35a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "39b08df0caad410f9e43c5844bf1f7a1cb62222d880c0fb826eb12df2165937d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "a5c28fc09f111cc7e04df7da33a066219588234ffeea47f54aef1921878743a4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "1d11f71772633b767bc991e266f630e93a8eb9d852b154cf40ef7eabb6e96991"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "9508b1667b68e4d0154b72d30b0b04dd7cd731e68619ab032fe550c2b4f19291"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "3c8ccad9c491d8017e1afec361bab18f1f0faef6638e1bf8ec1f49dbe37bb824"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "053cab687c4b6a49606a3ebc99503dffbc7203ebe20298305ffee5b66f12805f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "cdfead07fc5d350838dfde626f037bda0e1176c520ce2348dd3e077ae4172eab"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "3b5bcdf6af628f79f559b0432e6cd46cb0300a24431b3b450f11f745e0336589"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "5755dcbab6faa4a82d1e491b1f1cf4109da84101225aa1477d77b34a19ba778c"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "edc9225bb7a61196896875ee0b640b6e6151713bd00f2f0a6f21a74ed8676363"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "efb554a15a63c089238fd7e509426f9d98be717d5be940b8c6e39cc9a65f64be"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "823a6c7c4787e14e3b08b8b7f815bcfe8db4b6aecb2d584b77fb5af3499e6b77"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "96229a1d00d79085ead687e7d4a157cde4cbb268f447e86d90482cc0129bd171"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "2d4ed636bdfe4519c80042fd059be4540a496808615b6e96b896b6899798ba73"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "f095ca41caf64b2c2de85ea36b7a4789d735d87b82c1e1f2ac3d77df5bad376b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "6d7b5220c0b50ae191e0cdacf57bae25ce52c568e191dafd522aaa0d3fe7f902"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "77915272c848a7329b5ac87d8c257ea6a396c91410471b04546f055f0c434c29"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "eea136c58daf22f7bdf3ef96f8a7de5c30d93c730304a246510ea88fca01cb47"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "c1e60d201aca1ce6dd0e0ece56c7fc12600bf89d691899553b61ee0414a0abc5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "25ef4371ea6252253db63d941e45bf7b3f6d5fc685e40700d696fb69bf9922ab"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "1a6054e4f46999ad23e75acc87c256e4a895937b9a1c0f87e0fd32ddbda24a1f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "ec8c24db5894389f5248ee10fd7f066106ef2eccccf13476639269223fb63f85"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "f88bf29b63d84217ab04e9f1c22fae5defe72ddd0f73e77a4d35a8cca87708a6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "35c6be21a4f2fd60bacf3e0859813bd1ef898863a0ffd0b53eeb84c345dfdb61"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "1e26d8f787f36ac31b6d1b51bf6d74423df33c66439a14b9d7ef2a40ab6a3e96"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "506d16b6a6c4d3aa8a1b7bee6a2f6aa34ff8c0a09ac88bd3590f2ada8e4ab121"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "41837b43ea573d66051f1c7559944885b55f1d3a9ac5528441abe683160e795d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "8e875b0bcda9280b127f2f6948d19d7c31574f4198b5fc3006f3f305197af8d6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "bf0ac35776c66025c1e6170658f24464a5f2368a4d9b3cf0831a450fc6bbfca1"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "32336b8ff4aacb267ee9ae3da918a8b0a129e2cb96c9a55dd9dd648209730db1"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "e957714c98b4c0297be43354586b290db5aae57195b939e8fc9c52d7df80d23f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "5d20d07318d24326ac05e47f53d1ef239bad24f8a45c098603fbcd1799128128"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "e451d79fb4c79216e7540b70012b4964464be7a912f1cd309e9d7297a5e2da5c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "956a168012f30083429215b9c1de94c760151b6ae1f26c8af7ee758dc1075ed7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "ac6bd5c9b72afb4e8f292447fb14e785982553aad645b099360120939a2e3583"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "55670d4a33b54bb60f6336c31aea121ffae88538ffd55cb87a09b939d3333ad7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "43f39546ca0a3521453cf2c76f605d90b389cb2ead7ceb16d93a979e896a8afd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198888, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "61af1b0d3c554ccc1c8afb314ce3f53fdf516dc21c5dc540fd38c9d56ad8f5f2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 195304, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "ba130998177449074defa6b1df6387e1deb7b39bfb86b24af98050df17c872e0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "0846de615453b38254cc73cf951ec7304dbf426909ac40471573799dc079ca12"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "44bdd46ceec066346a2df60dbf902900fcc62328147f66848d4bc0b9a5dac7e2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "b495396269d4498937ef75d009e289d8eb3fbf685c810328866f6baee5ee6bca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "cbc295d1987b7247a2633e442c2016d1d986f3ef11b696b692231a2376fe028f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169296, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "f20971c0e9edc06e741e7c01ea932cb0af3eac2f2bf83c17631233c772d208a6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "15ce4a0232ee47d2b936db98ffaeb303a3e8aefeb1513ea7f1900cc78ef7f8da"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "fa57ff69ad706aa455e47d664b1ffe706f8826301338ff5a987794540bf00627"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "bd8506e3558d425e055d5f880a00f913c7402be298282c807695a91653f98a9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198888, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "4d032bc49816fea27e8a1ac12283b5a0c0c447a6506f29690720672e7ade24ce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 195304, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "10ca4b8531e720d6e570864bfc7165144fcd81ebedb232dbda3af111d27d956e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "c42f25f5c360a40d008521b28df5f09d92f3e28cf824d7346b1517a0b5816149"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "3ba666c6b4545a7ef08584ebaec5855649da7d683ba16346fa31db8d8b646127"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "0fbeb5d6f1bec5540036b2750118e1b151d4114719e859843c0990b62535f7a5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "db5b32976a5e127b8d143a2ea55f5f396f76c8bcf2a27816524493c9166536fe"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169296, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "f7bacccaf0deb1369a06a57eab0e5cdb4805fa774845e95554d49ea0a4b05dd9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "520068a9b51c64ee6c8de1bcc3d4c1b54a32aa477492cf1a99361cba3a44c041"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "6160d6822b761c3115f3435eb0c5ec822ddb5c110de74ae242e3fd5f69583fed"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "cae0ed4416594bfa58b90cb7fd78422b04a0a0c05f181a96cbc964e131755b17"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198888, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "61d35c1bca40171d293c7c7286f23e5a974b5ff4f0cb6cb063993fa871f476e4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 195304, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "2ce0ae77c01888a486af9f1ea6060654d78984d0b9fed8ac629ff986e15461b2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "3cab149d3f875a89b28fae08861c273585e6dfc1dd54b8f9c5f89dca53ceb855"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "4f3b8fb716e53b99869ee0d82e33700cdb91cf6c8a2b79c87a1fc9db5e78e3cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "dac3dd65b2ee0030dbd4c356303caef7f1dcc991c257cc59493274e41b3eb35c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "adfb519f60f6b699160ef1f3403b2a9f026f5c44253c6d938bf2cd74ecef10fa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169296, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "ed5737512d0420bb58979385986104f8085d0d55c131e4fb26e9e32b6becbb0b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "3188baebaf8dd13a1bf1945fce3ba6c4e4e2db403f28057d9623952bb8cc9f47"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "270fb7ecf3060c50abae9ebc3845c253951519d38261b0aef0d580bc6d549c2a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "115f2105a46e353a6277d4f2c01257a54f1cf8c2e2559a1e4cc644717826f381"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198888, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "94814c9f98cd9e14861f1f326c2590a7de0e57fbaad74dc4066b0b82f76a76e3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 195304, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "09b20b2fcabdf6ac36c81b4426a9dcc4dc695871091d8a681a41d7eb7b4285d3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "aa6756c332b8170d56b63b72b2a758199bf2674f1136713953b1a5838841fcc3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "e5fc3ac516817a94b342e4ca4028944a417a1d8a4d5ba1923ed01adc4d043d29"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "5cac56635df2d86831ab66adb05dd226a67d75ac42a959b16446a2d18529b781"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "93d3d9c5bddc21be9b34791ba7631ca39347ce3a2b81a58d2ec45df3baf0007b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169296, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "f40fd7d250c6de3263dc650bf87ca61053aeebdca8a2891d8957723d8f5c33ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 165088, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "97cba282bccc498faeb9b5901b88e8d360cbcb69138d67b4a7bd8c3165206184"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "3be50503372bf7c9a1cdff7c7d6ef7568c563f05e7f52677e5991b94db0e2565"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 161504, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "dc708b336ace6f2c4b139370760e16b8b36e5604f6ab201ebfb87dca652295f5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "6cf0c118522f8794d1bef4a61830e9b91afdf95d7f032f2c2d93bcd9bb897004"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "b52d2e383436cdfb28398d5792b6f8340233a00ab16d54fbb3e1d9328624ba2c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "5925e9420220010a5ff12ac587a07e767b8fbdee6a21efd1ae58d9ea87d02413"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "bc1f438bd6e8582d6c129bb84933fa8b3cbbe45b4d1c1a50efd7491d94683528"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158072, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "abfbc7fc3c5f48b486d7bbd7da0caca31899b94f66072858ee90ce2fbc643f95"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155512, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "3f0fdaa347d0b69f3e087c9fd998937b14df0e3dc2571e02b22aac34e132f772"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "9a6815e3ea018138c8887af86b2249c0d0e83dee8e095f9aedddb1607068b38c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "6c9e0e896625cb353dca124acdcb7950e74aaba6bd103ca090666e2ab6713b4c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "bb763d57e7ba6c93ad905ca9e853a7b93836372f07e5a2ec70a260f8d60a62ae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "5757f0a8319708f6beeebcaab33c38e4534f471c0df03926360d2569af2b3d52"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125408, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "a91898800295377d0cf7b1dde8539001b50f2fb0375affe8d83be7edb10fb839"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "3bd16b34616709171102c1b34a625157c696b1bd24b09b54555a8c11f455fe8d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121312, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "45f8839e3ee237e239d38a87ea49cc880283f5482f5906696c78362c54dea0c3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "57893f617ef649ab4e5c8a76718a3a76b738b5783e7843195ee0c37a49417746"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158072, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "8b154775b2d2cd7e7f748c55fdb377a258b5dcaeeb8e58774d6c194abb56b582"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155512, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "3bc66386cd5bf95a35fdcaa78e59e9e7dd9dcb11f1c5775eca4edb2dfc75ae9b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "660da2b7979fc0788d5fab5de01e972bfcb94044a4b72358f9d1b3b1be3ad491"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "90a902c8fc31a0ef8e7b96c09d214008aa30fd69342c7fb5eba37d92ad80a6bf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "9e46fd879549891a20f75e4dc4f9bf66489d60228f57a0fb57b56bc10c5b92d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "bf8c88c8f20eb725b6330031f45ce0fe65919db19b9719828ca49cf2a8f2121f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125408, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "58eec15308e80a967266c8d965325f4ab9c19b50e82499e9e61404a6d929d128"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "029bb1c7025597f3da0c42ee4b5ddc11d9bc26a4591cb931151ea7fc939a1fbc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121312, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "b81dae6bb41d1eed0a9254e5a65e0a8f7a75058bcf286b3ed51868195756cc4d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "3e558f3ca7e212e00832ca9c73fb1ca6a82e8b0214b30706f5f25765c2947f0e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158072, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "e0dc194d55f03548e78c588022d1119e869948c7e62d649eb9a5d8f74c7a911d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155512, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "e9869bb52975cf88684c6d96602cb8903c1065b8d8390b4e5fb1274c15c159dd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "ca01e3d1428fb84d67cbe61cb08e411006b1a79683fbee4c54ccaff1c5e52404"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "5aac29c4bc13783663f68bd76dd6ce7768443228210bcf2c02a4707a2eea5148"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "d7361f02f465b9d3dd77f4a367aa9763a120986ee0736db40bd98f7894ad57a7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "607ba74de038f00ac5a1a95f37fff79ca24913e199ffada53a8f42cb63e8a1a8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125408, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "dfa30a4de4a4adaf1c93cdeb5976d586cdef76a743f45bd8ea15778e1bdb3e9c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "53ff5c8c96b3e3aa4d1b09680328888b51c88c861750baf4b6010c165d720a4a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121312, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "3d00107c4bb300de3df721bf6dc386fda4de780a2ce1651ce4a9d64890f89ed4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "f0e0b76bc3482fbc1545cee56ba95bcd6840ceeb098727a6aede6a28c92c5e6d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158072, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "e75250abd20d96820547f51a4ba9cacd4a7674848e2e09c6230131ff85d740e3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155512, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "8f2ff8cf2ce3a73fdd44da7cbcb9cc462612d17e57dfdf037fe2aae7f2395126"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "b667494b5e4e16e475d950357b43131b7466e90c716a272e050e0a683b2d351e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "8872d8d32d0c6b3cbaf5937eed687ac57b478528df95c035c8ffff3ccb1f2c2f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "a998afd1497be8bad4d520e7fd596a83b5e5a76994239df7cc7f21b0890b44d2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "835481a35b25bf07e57fec8c722b861741b4fa375b6fa7080f3353fa475fbec6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125408, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "ec108540fee9350aabf740b74d4c65343af53e0f1e060c8a18b4a9892deb161c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 123248, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "56e9586967ed9c64b43d3721580119013c479cd74b43dbc97ec51280ac8f0e1e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121312, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "bd08210ff86e59f3a68b26723c6ce6efebdec561177efe3097e55c5dcfbdb4c8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 120688, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "87b30742cf41bd7148e7679210be7bc8cd2b950090969f6241f7d8a85b0f2cc8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "3a7730d3a126b1ac2c7ddd1d61c11a91bed1a90a5c5b80590e478c9aa2e983f6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "7b99787e1c0ca2aef7f89fe468c8cab7a0981bfa3cf426fb484256ed167f0d9c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "d0f7b0e00e5d22957837f4a65c23cf1918cec261a6eead1e2ab45d157bbeae56"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "036529c284e57f92f0395fa5e79adde2413acf227f4d6af88166264b942d120e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "d88e48fdc1ed2191135ec6aabcde74bf169ab6219427adfa89e0ccf3b4cb126c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "a8d524d080d1bc5d7ab9ac9bd221ac1ce46074ee7ccf211890f2fb16c1d39f53"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "5aa306803e686e4c06b226f01308b44a5ba34a3f6158f22ff588cabaea6736af"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "14d8f4dc31ece99846fa29de49a3846521061b26ded91ba971519ae12c99d1f2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "bb3188916c1e5e468df5f00f28b2c3db7e645cbcbfd3fea1cc260ccd9f79dba9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "2d3c1c5fcd0f23834b3b1b848039cb7bfe236c97559ea2474b92b6c547736130"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "49e13e2620fd41b4fa8ac5611f0ac979fcbcdf348558eb1234fc983d623d1701"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "7027ce4ad189042c9817fbd2dbb80c8f7bdcc05538982890f637275775f2be95"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "0a1338db21b71d576930eeede541446b3674878afc2199a4dc55b8e1ac74a477"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "f700c17ac87cb126987aa27ceba63915722232d2f6d3dbfdf06e04de73f057f8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "fe5c64848d8c86e70f798818bad5ebafe5e25a8da699df22a0f9c772dfda27fb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "cda71ef080a61c339dd51e976a0b7fc673fbb51c485657aad584707930f2c93a"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "48a3ad8e95ad608e37dca88fb1c6b30ae2b264190e24ce988e31e67f67379b4c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "72ea100dedea0812b5cb74686218d92b7589e5a5c02f2014c165131081a6f01c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "4d16193547e1c4a285f34dd8839f90af3d78d08ee106efca37deebb2ec47b567"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "972c078448a1c97c4fc541967fa5a0a0c5ab06924a30de1a14863f00ee68334e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "00b8e2323acf2e11167e58160f060b14f3e67477493110f92b18c1b4451e27d0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "567600e214c0814f8029beffe44791082033f34c447accafe7f33d63dae67911"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "096d8f511015767668d4fa6eb91e95104aca7f51e92e36ece7d0bd0e3e5d73e3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "9c89caa10dcbe90fe26cad1bbccdc22d54627bc68db65d985d93588d9ec2b880"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "7eb462f7f71aae495cd3290e1d8b9a3436dd2aafd6f0b467b4bf750a44615883"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "b3c0a3e4524065fbc9a0ddbbb5759a01537a093772709a886757929e21a8ac13"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "ca95595ec471ec1b809166877a74b0ab2d064dde73ff79d73b89c2d833e750e4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "97a086b5cfad9be6ce37af1176dd76dbff25470b0bfb8f01f946aebef307a1e5"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "b95a5b18855ade442c6b0fbe74ffc1a167ac81c5a4a31a9831acee14dc607e2d"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "684cbdd23ad5ea93d8b93f32e6499f333bd58add1601a7cf93e5aab43101578c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "e6990e5c38b2bd8ed990f5ab145f620fabc10d024c239ca18853009d250b5e88"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "3571dd8621b5ccdc1a639c0df503c0dbc91f82eaa7dad998200ebe3c23b1f226"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "e8d7a8dc072cc9de232989f39c808382e9ff5c7f1257d476a4de6e66248e9eb3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "2a493e44a09589f99c83ae2ca599464743f562bb0fadec99792330e14ca1e4b6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "97345cfbe12800c8df7446264032fc645557336ea611e6a8b2f69718e170f465"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "b5b67fd3d26d4f759d837e9b0d8a42146e89c19e7f0ef068b9bbe489505d9988"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "1835a43ec883ee12d5d35ae387fd188d8b05b4528d939fbb46f9ef51fefba5d4"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "8414e857ee9ca7622e4ef86394a0e0b04dc6d79f8a39ffd47af924d7f031fcad"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "8731973aa592c01dba5dba7e7ec54e0a84bc05db02042f6aa25b8f342a42f214"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "db99f594e6c6979ff7cca9f61ed3459ad92bf17d3aaf371f43e5592273462d9c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "b7793507fa78e448a4c03b18f200538f266a4f6d8bfe1a458144aff369c9f901"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "2bdd9d78cc183656d52588cc9996829e3281001a751cb2e29bee1704cc409f74"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "038fd7c0cfcda00fbc0dc9937700cadb2f879f7c0656ed5debcf0981d0d841c6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "be1e0a2303fe30bdf8b7b16731b33a8004edbff3574232340190edb8078faeab"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "235b599ccb001fc981c5b48a648dbc5afb8479944ac7832e81e88c57409157ae"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "9f5e0f19ebceb3774f6f90d86205084fb4add88fbadede616b6724ca720583fe"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "62f1d858081c90b21082e9dc21e1541641e957592148ccd08270fa03f1a6a56f"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "298a2df86d495c24a0ab3f1609b7ac272055ddebe97e9201724283f17a819356"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "9feca43e65db5b9e97a2f80a7a3a03fccbad7a6f6e85472d4686c37b43441dd3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "ff1a3a2cd0de046a45183a8b5b556216739c15563409c17b1f1e5572c6af55ec"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "ac4e6257b43c4c1e862833d6b711bb9eafbed372dbe124b33567c6e9c07e4a48"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "ef64bddc3b16651ff4509cd859d64576ec1262c691927916afad9feb743336f8"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "6db508d58d7318f1c16c73e6322b18dc8dd6bfb4a882a5ed1470375cea82ecdb"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "097271569eae27a1ad291ac7c4a4eac9438db1fc8a350895272ef688de595c19"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "355cacd9f817b5dfcf3d1a57f06ac0ae855fa20f8e60c884efcda7eb1a67dce2"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "d8dc714bee3503e8b3f76aa7a6758dfe46f262584c4da50fc2da1b6f9201d0db"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "bb50f2748a00650ef05e7354604ed6e02443b6cc0781acdfa1e715d16a361bc3"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "5bcab9011d85870c5e72dac66890e1d4458a6fb1b7bf57b0124f040d58f0ba9e"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "7eab9d9b4f1bc86921e3973fc9b7a0ba4675d55a1a06de4fb3f12b9555f8ad27"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "cac71e169204273ba0b61fab7393ad4ccb1d6a5e4c72e33156e92056aee1cba9"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "77ea3cca4bfaaae06498c4be2afd56e7d8c2f92d72df76c264a397fdaea54ae6"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "55b1a6145132b78dbfda15b695db2ef19266aff5c8ae701f5505baf1ed59f71c"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "e7ff2a55eff775b3a0b27dfa3ac74a93be67e3b5d501db7c99bc598e95385218"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "a6383367069918d97ef83c6694360a6d54894ce1b43763df602a00dd5a3d8c6b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "e60f819bcc001c256bf582e16176a33cf2f1983a84213dbc1b5dc8897ae1e385"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "dfda65b3fa2ec3405e70e1ce5b7afd06f6a4829b3da53b05d96d08f8b154eee0"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "2aba52a9fb7df2944104136b30c1c964d05c6b6ab0df9cbd260085b3daf72c84"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "43e977673d4c1ca695a172c00f71f16001c972b7479477d403b98a3ef8796970"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "bc6499c9ef96501004aa890138e38b41e707d8efd0cb6a76e1249ded0c0ed37b"}, -{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "d01eecbfd3e09bf9e8e19dabe9ce8c8de79bfbac07247d2077c2d63059730ecc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "9e45a127cece4f07a224f226f9197019aab249809f408228100ffde719744471"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "08d92efb4e6ae4381f43e5d34538a69c57dd64450b3d5375247b0726d345a2b5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "f828ef9800ab0e212c871cb57c0636644c61088d515c7ed61d5341a99870f5a8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "6419c16760726e6f1c82bfa96bbeedc357c330741f349f2e186e6d876c557c43"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "0e0ca54591d094c379162bd7f62dfb1cbf7ef0c41bbe189358002ee4111bd050"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "4a291c66ee9bd7435dbca482c5d26c83a1437ec9fb0901f8e60c625c1f11f5ad"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "3c3201c7e08114258f207f032b3a9c1c82d54db80eca4f61826cc4d8b4a2f3a0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "fda516b3e367a31b5cdf527bea20885d3a8993c69362d3d130a7df1b301ebbde"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "4d4a98bd5386fa9ad02b594a6844a759659a448d6bc232399e637d0d5016e08e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "b31cff13a92bfa8db826e18915b80326faab5817efb6e4a688804c42952b3b13"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "3d1ab3e7f93dca2f5c619dd65738aa05e77e8cff353a9b10ef7c24678f637c7c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "9c0482b2b21fb35226ab3dc8a4d6ee4347baf1d419f4e08a4e5cc47de2eb0cfd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "86a37efae804a7146dfe2f24478d08b125a1885bc79e79920c73131ce5b56aa0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "abaf267af9c9d0b83a69453a39ace2c8982b56c630a2f2768abf86e706f3a671"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "b529c0c1554a4a981b553baf8e4b77309b002e6e19880622298aa79980d0e678"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "6dbef01a96b9a92cdf1da4992b77108094ddc823d01e14b53f8c2d66fb8488e9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "73e3c22c54c208674aa76fa034464a9dfa2e5d90ec3f6c1063c1ca76c68324de"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "5974c1ce9e08c0123e19261644a1bf8349b9269a5cb0a9d977eb074f5a09c557"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "6ff0caaa975c20e985d9877f1d621e3646d6703b6d2f4dc06237fb454989d6c1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "47e302dbe61af6bb81a1f7fa1b81c7b930cc01cadf8d914151cce28f695733bf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "fe59f82f7c2f48ceb05edc7372469d6804b618fd0642440b671a6afc476373d3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "6142736a270dc7486592ab46f29d948f0205ae8fc725d756d2e2d368c7769d38"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "06a98cfec34cb2f1460e182295b194a738bfac9a888926615b74bf536df20e50"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "289a7f3447e948896966fa2cee06ec458d274a3c3bbebf5eeac690f7e978cb77"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "104af5c1b942452dbfbaec71ebe9825932eed03faa1e0c2386aba63bb1072f8e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "31906c2fdc5c3c20860cd5952ee82267c92c43797349958d9c9a201a97996119"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "9b710febb85c2cf6436f2d9d8fdd7ec77a0a58ad182fac57ca1e2264366c25c5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "4bfc8023a4835c3f5c027688f7d3b0ad9b485082f8e727768e176c223a288163"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "df2e333ce214658309ac6421ed7ec5ed3db30113930b75b58791c3b562aad34f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "91797ee5cd2a69ad18977cc2f4e0fe2b2103d0a60769c5a528921bf005892e74"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "14798794d19ed7247acacf6815ddb1db7935c45cb7fae9023092000f3727645b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "88c068371b3f3ff387dd8f67308ae231a0ec59e931e41ece115df6276641f491"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "3aa2b797bcd0ff35df1745071858241a7f1b659bc6a99acf1f4a4b6be701de37"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "9a97d3e4196bb4cc1c1984a46ecf6059cbd0f6cf686d62f9e808c982dd666b87"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "0454b7865b457ccff21718eefc934f3b476dec15d1fb196883b55d3944e98455"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "eaff770aaab94dc462911458fa89df1fed25f0e95d622f4d5c7d61235110c5ea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "03cdf0b36d180836d7463ecf2bb26bc68a10891def14f22ec03acd6319431aed"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "68da31a2b30ffd1ed29bf41b38de2cf835559253cf9f6e55d5e1738c93a77bbb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "d24f12b02850f32ed01aa1d6f113c872ac4cb0fbe963bbcbfb7b62b1c21d0369"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "b8e7a0efe27203673735a28fa2bbd960f702f47116b7e981c3dd124ae6e06097"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "7629fbf814ea84c66ddac7ff6d535bb3cee6667945a69807a5b61441e039c295"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "74a1813f32004bfd2e92652eb58d666aeac2a623e15694285ea14bc2b01b35d6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "000c21540de8745718e4653a743e6a44766869c69bbc627b3d8b71f44ee90951"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "f4068929d5428c8858032236dd9a4c7c70c43250de0927f4f124689b166716b4"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "87f797983af20e8b6cb53b654ad22491f099175a78c8e594088b4a66d409c4fd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "012d800e8822a6d200f5dc0acf174a6f2123995091cdf15a88106101ff36f7b9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "9d75c8629dfb6d2e39a9f940a4a6f70c08700a0d1c1390c07cdad684b302a813"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "1dd0a37b907e5745d1b54a7338bff3fa615769b12d66ed37d39677363477e1a3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "f7b994fb97b8e1e071b6a4e6d40342d96843f08db074f72be489d51046a425a9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "7efade80923aa1178c4189f5e3d65855f20f48624e67ce40084249ec3b9559c0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "3394ad311796d3e3311ceecb97cad5bda2ea844b7f4947d9dd4a7f0c1debab77"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "553a87c745347f7411b18454f7282bb44e9d649527a306a2ab3f0641916610ce"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "2e69ed80450cc7f7814f959dd3b0419552d238a51b6779b429cd5b73b725dd76"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "0bd656086add8adb437e6730292f2626947a5e798a8cf8426d1b2defe41bbecf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "a45d4a2b6707a2126d39bb214ca128dfe26701d04065fdd6d2cac152af3d188e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "bed1630240ed73f78f61cdd32725ba7bab19571ae2e5121aeb99a6b78936897d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "988370ec3a93f4ec4defd97e27d903df9bb70ef4a97f78cd4a030c78bdcdcd8b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "14bf04758f07f0b3606713c63a92e1d2e57160d8e9167aef3a51b3ebc01d37bc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "39a9f8d7e5ca6ecb242cfe415e779f32edeecf2b5fc2cec7f23a4850c3ccc8ff"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "fc124bc8602aa19479b58d9806174f9abb0bb73f228baefb5b57caf230d62c6f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "84af14be0401bdf3f77f47baea8cf418ece2f7a4a5f31490eac6d4ab93d615ec"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "8cefddabee1f7b3c45f780bfe3c0287e4b42a6d94a05f1845850a48e776f39ea"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "5b85c674f333ea3fddc70b7e64175c3acd521d7a0c6c304811c38e6843395452"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "2871bf9b4279365154f7cbd5c545741b89fe27006f8900655e044bc7871479cb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "c8e446562338bbf4ffe038b4061a63634dea0030f9d268a2e38a3efccfa1b354"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "873b75bf805533daf7cf3fbd696d5d4b853b83610c4b543bed72c84f7ac1a348"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "50e58ccb6e6c4a67a5669222def072125b06853788bf203f83541ae62529cab1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "b10dc76f5bd7b49bc270ee8eb0b0dccf441beeb3ec8e0a53e1b000c2537b6693"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "648f74f1b2974d8c9c12e930e4a43177551dc7269c8c6c6ba04fd401bda05bf8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "4c4a115ed0fd00c3ab44dce3dd6fbf8c7d09d0a56547e90d1f20d4d8a03f0599"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "a3af20e5efa2096ef5235f475c35fc5ae830b1c4c388831ba910ffbe9369ce02"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "8a8ff7af86bd3c398115ab4267b11d90d3416841b14e5e1caeae475a19e8b9bb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "cd1c6775607624fcf7d28a1a04fca0cd5d444dfba64136c7524995be3cad2966"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "a75e7af60d355bd9aef985bf2f138b6042a47d2dbdd2201bcba772a8beddf0e8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "f8a2c8849c1da38fc11b41ed288abab724a22935e6e69a7a3a7e94326fcb7142"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "29ffde304c12bcd224281546a244cf5df8cadf9531f94fe0ec7a38ed4d8e6fc1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "00671d3a1d1c9c70ca590d3b5ebe15a8ebc6c15c81225db2ce8a69400121e321"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "f963ad3ea45a37db0c2f4e40ba9c3b241e3582118f28b4d591302c3649251843"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "d0d450ac6ebaeabaa1126057407327209869b3ae69da70d77702fd47903700fa"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "70aa8bdb92b41a1356f5fae50cf8fe549c3d889dea82d22c57711ba58d717d63"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "3aff369a0f7b8fa3cb928695f9ee3ecd895fe6ad526800f7c0c131e79c159e15"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "3e630419b41bd223e8a603e46a87722bfb383d12ec1b6215fc3248e5b7b8840f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "033c811f5724e42b5351b42343f9045143abe8b1452bcbf4812e5a7fee2bc914"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "d1535c0cd5ccaa0474bfb25dab64156bbd043d0737ca4416d90155cc1d84918e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "32386bf4bc8714da619e28b5cb28fc580bc76bcc8e317e4c4d193bb4f630c38b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "731da348ce818d1e7a4fd0939963a020f2856a5cef07029a4d7d7b9a00a3b8cb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "b900f2725ddbf0719786b5d76b999a91a766802eb37940bb55cf181156001252"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "6804d0da684bb77cc62a1f1d514e621302388a006189bbe3a9730125a1ef1f40"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "91308ed0c068095f54413d8e6b989975cc0cac382781174495d5cbcf1cbece1b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "db942c4f13b31f0b9ee0f89a4350028ce798eff7592ee2e660d411ae208247b8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "6a2dcbcde7ee37e049a40f8099f4d31cd800bc843544249880825936baf975b5"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "2d031a8a85c37e522f1680b60c91459a95675a69e25e535dc711f62366fb2241"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "9370ba42c54c35b6d839234cfc4466f2cb31792d627f255107301eed7696eac7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "b78e0089800198b2bc72c032ab39c6afcf4b8a9f2069684ab6c4bb5cad727929"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "afeeb154b882e360a1a662b3840410004b4400363c056dce1dda4f06a7c1beb1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "78d43c085f677ff990e1100dbeeacb3b5085abc6f2991e07a4209deeb8dc2749"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "241c29e95cf99b8b5374e63ea4e457469e100f51458c645a38fed0f7b0863fbd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "a49f6b9bad1d0537e5a9d44298c6be21857fe489f74ad5c1a1133ef83b9d70ca"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "6d8daeac940f6611cb14ef07a64b2ab1be8b8e790010d8d2788b2e3c058cef48"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "2618233d255db91a85b176f8f0dfc157108ded0f6f0f868755d189c23f71bb6a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "21573b8ed94bed9f7aaba657c5bff73574ce60af3d163bb2f38c0854e34e7528"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "4a1ad14d3ddc9a599880d5bc441df1ec9ebf68f448e9434d86d1989785b7d619"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "c160757154566d4dba7d61c29f68f2c40aaee55cdf4a28786a3179767f39a6a8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "f9924e75ce02f405f84505322d6db26c18e0a818404c7ded20469db41c88c628"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "8b9ed106465fa19377a94fe471f704490cc0f52ffe76e2e053aa3608802f103e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "99e7838ee678a237ce253d1514afd70260d8de4cf12400ef6f2b2f1b2567368d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "c52236a883b40ec2d70d12fe7604600b7f4fb0139eca502bd11bfa6609b6c3f9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "f1baa159d6f694c0a4862455cfd4d61ef98e8823fbcc1a67b2556b16d8148304"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "f04280bbe4a60842fe5f6098bcc521d81f459e8d7737209c82794b4acefbd5b7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c9430de80b57ec827d0273b24e185a8ae5b312d54d1b6539eba5d376aaa0dde3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "e59a32bba4bc0807451a372161674de9b0e90bbb564527117c0490054905b3c3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "a9939c4625a4f7ec572badb3cd357164eaadced46d6d72abba58c839f5f1d8d0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "b0ece1a2d7be10b64513a640dc81f5f03a9c8edf0fbf95d93ba3a708b3dbcb8c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "e8c6ffe4ea696a177d9150cb7af5f2e4ec9b3f1785d8afc85602f261ed57a653"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "01f96513926c0307ce56c5379e108251059c8dc0d506cbb09e14e627f9a82777"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "5cef78b5d1ffd6019fdda61bd474a417949ad5d4e20e06131889fa88e9c74b38"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "f1e33d9ad417539cc990e60f1a01e6fb1c113cebc27554328b1f3cc79e5f821f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "38a57c5d8e2e2b986e6ee9d188b3f21223f6267da33d894b9fad8c2cc92a43ac"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "bc3f933675a680220686fbca77e91b81f87300e19f6e5f529d636b30557353c1"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "53cdb6367a2213924d2ea45219476f0187ec6066fcf144af3a305e073cc2f422"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "9836cda5791b2551e7f3875b444328840ca73277c5646c7e73cdd88e0304518b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "da48d083f549f651b4f7ea1317ce65a2725dc04af9cca31035ea9963e9b42f0c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "f6c7ef64d17ea19bd29e85d4ae11bb3eab6d5d7e0bc586690d48c373730fbc31"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "36438d87770d84235de26002103ec9ea5208bb5cc5d940f6bb9d738a41045019"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "d0860dbcaa1051ffab9004aecac5c22e378e8583247cd702a66642851199e0db"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "71b6c65b13540c8f6b15a553e3fb68c8dca5557d3845018ccc92eb0a9c5a0ab7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "82051346bd5e75dce962795af769036f2b0e0c9fb2509304220e0e1db52eb5be"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c8f1997205f0ecf1975f8014d19330efc0afc78f4737e346cced03b29eb9574d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "0493b204962500d40808ec76c2e303d2167b35272fea6a5a3f79df0a7553bc68"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "6658b313910fe1a146e5c7e733c30653ceba4a59df5fe065f0a6f462c9286aa2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "b76b15dd71baadc1822f6e2862430feb0dc02da6c47538ba0012589b5e3bc264"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "46b45a8dc87ac63435056383a5716a5f2d4786174d3e6d47714a7a8d7956c5a6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "6aa5182edf458608f1a6604c1ce92a82fb5acb4109f5be26c7dc6ba5ddc3a9a6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "f449e21d199286a4cb618b82c5f405b57fe1cc4a0336dd70e5c1e76844c7d32b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "b50a3d8e3464c00ac5834788f79756aa2b4117e93a46768e441c58d69d9262fc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "f44b045becb30effd48fe6aaa48cec21d39e25cbb810ec9320dab3d933bd321f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "79b2c45a0296db90daf4d32d3f19c46134538a94b1bb6513546ecaabe8ce6f72"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "1cca5174509b0ae9694da27e29644fe28c8283a9fa8acd0345b6e23b53112196"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "ee4bcc6a4958363a43a2b99425852eee4090073f992c7505ae1a76de820f3dc2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "1d0a05facd89598da8675962d1c5b0b1d21af2dba566a87e5a7132a8b9a47fd2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "1cf6decd07d8c8fe639c26c693b0d0cf322783369add9cd0e31012ee4d377440"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "2911ee5f0d00074e8d9958723cd5488cfceb9854c24e5c4b9d8da2ac79df06bd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "498eb36a9e0e1a7d8c23e72c4767d442dd6140414c2ec203facd4271b93e5231"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "6645fe406239ce8d78d732d3484b986c66b51b4f5686ccd2b2c6431132a442cc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "a4fff8ac521e59582a722d3f6a1de800ac2e8685461569321e2b65752a7da638"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "310107ad9aee0b96a141fd620a1a91ba83829588f224d9ec0239cd3483309b8a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "1c790e9fe27173b901625d306c4d057fe275c9ff6c6f6976241f9a1c05c863a7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "e50a127620064f2b0fb99496ee4b5e12512dc9bb6cf9227929f8d77791a71f53"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "5375cff90bab1c598069b2b0dd9b104103c30cb25ede73157d0ada80c705e49b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "3727e3d2839e5178bade82d7403a55ddbb1fc2d80db58cc20939d1014bb400c3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "5803e9c5958440bc40af838da54bc9aa2aade0ead210b48824b8527903593bb0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "889f97d9a53a3cfa225eeed20b5338fa1f203c431c801435f493d0273a8c3feb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "1fd0e34da4ffddd43fbbdf6f630597965ad43d5c315574439376d0e08a75254e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "1c6477208b3cbdca9a081422ba505603417695751e44318aa4d7a25deecd4928"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "94362ddb86d89db77abd5c07874593a0b5f9b6a347a32f30148585cd6d55bbd7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "fca9b8252228dcadf78a7985639ab2890ba55e859676b92351847f243fc1227b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "0ff02047e34d43e4d3ee0947744b44be4f65a92c64ba0ea39a75fc99a889b8b0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "6f9af0850a43949e7e6bd315a829d35957cfff016c6b9eaffc927733981d40a0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "1d5b5f4ab789d92742a513d0d96fc7dd060ea2d70e4ce55c50bc48ac6c363858"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "5096a7f4efd59785a8977b6a5cb8d58741884eb37a12a204adc26b258f26982c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "02e0425d7b638a066a9d5626bd1fd1dbb3d882376ed9f2d9b42fb61460b41cae"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "c02ce572d8ca8ccd0a267e157f048dec0cdc2b904282068ce3b5d3c09dc35918"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "0f4fd9480fadaa50a50507192e3d216416b4032ca803d84ee7d8b5f816cbc7f8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "a96de57ae73668856465ecbbe515d4b3d4f7b52984ad6a8461637a6514c3b62c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "5e3019c1a363af11d1152f5483091eadc95078fa43f8a84aad4ff38629296c14"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "038bed61668bced4bf83b3dd6a4b79cd82bfb8df320fdf7dcc3abeaf140e0187"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "5b2d1dcf9431ad61c9ba830ef800dacc70ff771edb035844c95b656dc8c0eecd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "2a3319be7332054b25038e8b6511545463a156ae9651326975febf6bd1149968"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "067902bebf802be826787c3a0fa1227c4846d3a49a6a5e47e9a5c47ef6254f3b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "154a96222537e546b9b9fd417cf4e22c014dbc2e9e8f05934f1f6e4bc5f56015"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "9cea90a0693e511a75c3be015b0077de8c19afaa66860d5f25862d2f631f6fb2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "2f1038acec58c81f8d6d8282fcd1c1170de49819ab83a2dde37c73dfdb47cf92"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "c9740ed576f1cdff44acbf634abf02a08cf915d5630d655a416f90ba1ae459fc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "121865d859a1faa7d6773f5d96034ee9d869128ce7443ad55b39a2c353881252"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "f0c28de9301e5daf9184007a8f7cc3291cee14d99cda50c59dfa0f0ce5df6f38"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "bdb178ef4dedab9675deaa91596e620a69484ac31f13935011e3e52db5ad0348"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "d2a283ca561c75c25ef5b1151675c28a6f33b85d23e70be0235440ebd63997f6"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "be2b42db6b51130c7de3a85fd865fc6543e2acc8f4b7b7758485887f9329063d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "6d17600ee67241700e183646e2c8f928067a3a5fc7275384a5007f2db006aedc"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "3ee6a48cbca906639c9b275ea204228f2bcaab68c5cb50e9ea69cd2e07c77966"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "bbe51c6960fbbd4e139b0e2d51578d5a0b7203f24b3f1abc475b4d5ff41bf65f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "7636425416f534cc0a9f9663a06213dcd5c56791cfe278aa3f39894b3f5889f7"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "d5f9d1d53eda0eab360945df691e52282bfa2c041b7cc9e1806932add0f1ad1a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "5d2a5c3c37c4b1f27d75ca39b5281b26eebdb2bab1d3a3b2302e6bc0eca53fcb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "981658ec8924aa3f42f675ca1883c8cca3880519c98ccf7a1015fbe39f9a609e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "19d0d6699c02e89d088b2987ebc8e929871c224809315db60f2aabf5fb54d818"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "0df04d14937127f5a99e860bfd1ec3676e8c9d38157e28c7b6d7f3f9867ce5c9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "7617476c710d000a5c0d4fa6850d40c2313c293868c816f163c5593ca89d193b"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "b667b1ab74333d2f33936efbd53f3f65a44cbc308d7ddbfb48f236f02917bc50"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "d3be9bb8c24c0d8a7431db7855813536d661a479ef3d8deceb0fe5fcd9bb2a9f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "f447989b9a9af544f6b8c7f0620f12a4ae515a396b21883d3557da03ab78a104"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "f0f1e73a11f971fca9871150d54289bc51a0fbc1bc515372629378b9e71a7058"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "36049c4dc91999bc2bd0d30b26ce779a4d82d37bc8a03c7b519cf302fbc5913f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "06ce117e9037d32e50fd5b3293cf2589aa7b5167dd246facdb254d18fa67cd78"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "ff86569407aad2ffe02838b67ac2d4582a6f52d7e8c1c845af412a506cf34b0f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "63d4bbbb63ed0e6832fb27ed7c533cdc7cdfbae1e2a00a5d42008e8b85183be3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "5ab0a9f32020c007bf039015ea612a76d477f452725f95cc50319c9ca9f5278d"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "f9172f7168cd54b8d9041d64b88626bc05d1667c23da8cefcf25e6bf8aa0f227"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "98323df5b2869796981aa2a8419866fd34da06942983fcb270d7d7b32df7f0da"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "744e31645a44cccc14373757dde8ed2715f904edd131eca143ad29a203d3e602"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "0fb49407cff587a296f79e0978d46a02664d10e69a8f8701682eb4b1ac80db80"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "9f0fe85b20a43b237a615cde6cac2a88046b58a019411352868d969aa43e2e7e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "4c931628006cd46d3da5366aa6a7653ebf92bd2021ac2cff8c3e8dbaae401388"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "ab27b729205247bfff269b3800c2898f0932c291846c983e60990f84583598cb"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "306d4a768c0bff2236a8b74dbeba0652872771280a7be69334fe993b694a527e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "663539f952818bd0ef75e837d5d09574ef3387d57dcae61cc89ddbaf847a5c03"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "b8bad5d2eb4f50d53757632e08b7109114d94a47e1244c47b6d197c72224339f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "0bb466e2df0b6ce424165b4e6b1f0328c8aa77a48ae3ed607440a542b36a4fcd"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "be731f316376e9dd58d7b901deb608005db5bebe0372bdf74ad7e6ed523ebee2"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "16d64818cc49a851de574cb8e2bd75251c24c4d3555c2ada2dc71daa13f7e075"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "a9fa5c4934e298dc1cee6ffcd854851323755c730fca43750aab16c5d4f61d64"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "8b5345ec181a003564e8b9be6e7318b735f550c92196f2456731b150020b0807"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "c8377883a4fc985437e20555aac7d02120f5f39fec060b10d0a747491801304e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "7c9fc79359d2529cb25aab1e4c8e5117e535afe3e1adab52ae28739a19d34641"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "bb1fa984e609756818d3cffd89dfbe6bb606e519c988df677971f0e6fc3e3fa8"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "eec00ee7ff221794ab1589b45a74569b9b6eba508cee3715e9827a122e2d923f"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "b8f0cb9022b48e12687a3fb4cb3ce8606dc28d550e47f4e4548f4a9de8c762c0"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "097daf094f9c04c48572cff7b8725e6622cdcf0daade056061377d260ae5db27"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "6aba07f04b1c9be8c9f3c6336e655da02ec4ae77d66f64a9749a81d83d8d926a"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "46a756a3e90e542b5a5bd091e97e0eb6788e1832ef33d7c4df58ec512fa6b665"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "ae09ebcb463921cf2ebf39bdd5a6a0769e66c8a24fbb8d17692806a416aa6074"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "ae3f0aacb01e7bca5c8e3e956e972a8000610acefdeb7676d84d3d933565672c"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "a8328d933b070ac00d5580dec5491f58a875c1b468b0548aa81d727baa5f9165"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "450c5016dbb4770695f4632c610b5ec12a60fce6d752495a58798c225c839062"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "deec4f9b15d025fee2ff19ad7783365e5aa1b215a7d3daca3912f17f5cafc483"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "aaf990c6cbd1ac52645311d3658ef3a530cf22c80e4c9571dbe582b552304ccf"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "3160fbaee16deab617906db6c29989c0e6d4b696cca63811dbcdc1166c590d74"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "510496993a140b42b9b3a4a5542ca60e92dc06a83e8d1c2899405b3d5ff2f0c3"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "2d78ae944911a9937d0855ff18d120d0f88fda931c2750681c22a86ab212ef76"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "e88f3413ca5d7977c7fb1797af592fe4816cb9d72f6f2571aa558a7254896dd9"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "0b7e07ed455298d760c206d124d3c8c5d154f9af8912d0998f677d5fdd119f9e"}, -{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "4b5ab9f02d4a37c54a5cd89de7c7f0aedcf42334380f41b2af0807d2aa3c2b12"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "16396892e79d306830d82987e572638ab907d57ccd0334c49b02d19fb3166cbe"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "0778969f046859f7a85bb740327b7b5a536de785fa398211f1f330f0d88e5041"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "f61d84006c520bf63f10f04e8d1f5c91cd820f01a91526ce3de24f31bd3fd89c"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "9bcb76606a57144fe5976d9725f1ba3f8c915679db6c0e1bd5194f4d09207994"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "7ba6536af5bd7f1a0b8695a7653562c8d13e314ff916117859cc4f89a6fbc4f4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "005b45890da326bc90b33eeebbc64d89fae48d8d9b5f4a8338be2e0c8b515a26"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "4cb87673671f9f3dd63f2e112504903dbfff1b85f2ff21ebc9279f146e3202a0"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "ffa43f5631bdab6ef6363ce0bc5051d54ce1c64d62efc45a7b9c76e4192ae79d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "f6f65fe6ec360437f401de283afe524b99e16d6101d4a7649ec4aff340d0c363"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "c42af192805830c9de8dac1ca60995d6755fba46b20a72973c775f505f4f6d8f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "b36ebff25fd64e0e65443065a7069a721bbc753af8b73673754e4f8b2dc87240"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "70244689de1f4cf5cf35bec968473fe3e489b6a41c91c15e4771793469a0ba54"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "aec52a6818dc4f57f5f3a247e89a03da56bf83a9fe35463aede057a6915b02ad"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "c39614bf2ea4f7e8a0c576e79e1296d8317ead977e076ae9aadb3c4911c3b5eb"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "cf6a16a5601f3698aae5ed3ae31095de0a04dcc2a5889a50291d14a9ec39bb01"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "28921830b32422ee8fbfdceff51b2ef34d97e9c941144f60ec4e4c5c15fe49c1"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "e6945b129c53d75559b08e173774326361ff870a9711eb281711e30742b241e6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "047f6f6b83b55b51077832d837f1f0dbc6bb51fd8e2826982cb7aa40b04caba5"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "932bef26be60a4c213c7d4c23331b15c9ac4704cc989e2ca51fe44fe803997e8"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "5cc515e0863673be4fb84c5aadea5606bc3edd5357c9d561573008cf99d18f0a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "ce5ba8963a1f37c51ffe9929e3924be67a914ad26244804150c15a977cbe4178"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "2081ba9f40a6838b5ce5decec1d90bbbac4525fcbba546b19fb31d55a58205ba"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "1f6c0c7e922c8912e87c132651d7c68a93a12a552d271912795fdc64194209cd"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "13c3f8e0afc62292139e8a0824638945184d443a6b9aa9fd8dc6896d64002861"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "9144077d713542bdcc85bf554509dd0dbd8ccbb057490a5e70f68fc8fe7ed3b6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "8c7ea1cde599a9800f307832c7f3caa86285fd421b711f2256ea2b480b280b75"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "0aedeb3531c8a44742ace621fa754e4959c4ca9e46c6bd43d2131ca8e1e09ada"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "fb39cef3ca92ac7dd0ab29ef31a69278fb125c7b2a170a02a6758055e2253bc6"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "dc2f8a14f9a43a138884cafbdca3097ad42c53692d8ac8e3041a40d63e1a073a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "b5fe7af2fb1288dbfaaa81bf5864e838c70274497ec82d46c0ea69d6884c1c57"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "91d665e5d665c2451f3e3174c083f386e5edc3d31732d48c40e2cdfce16b3464"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "846795c079478692bf0d6a52ba34698045524d1c3424c125914c480dfa23b017"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "827778ce58e8d6f5b9041fc8f151995e7e3a9d35628e5ee0374cfd6efc8a85a7"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "bb503b8da68e79d088af04cfd1426307e20c9fc873c8299a1f35a99f900d3f6d"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "c7feb179b5c664c119f3fed0d1b24e9071c1921fea3d303bdc264aeb7611d481"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "500092fce8c9eda623ec18f33465a51a0c13e1a24c1353b142a63c9206a72f4f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "234a85f4e8548f61844499ae1aa4c13544c4cf91d053da47473baaa6d53fbb75"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "d7ef622c90ae3529918dec1d0eaa21214204e0ba1d8d066db61dbf964c02e3f4"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "735fd89315b2597b18b8c1e37c047861ee0e68f2a033cb5cab39fb2a38bb95cd"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "bf256bbeb69a1bf7471f11d579f346c10cc4beb28cea08fd6e567d4d0ddf4b08"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "2686b668ad6069098a3ace79dbbc6b98e0ae229aa44b3189f8635470d0d8f647"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "9924a98694a0899aa85b7ed260e227cc83369ce5b0fe0c4c253f0790750fd101"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "b14c04deb6f2d30e772ab9884c1b3869f3f09209bf925ca4f7d973abe447ca0f"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "60e551951883df56d23e755a26b04481d67e162842dd09cb4b4cd91bca7aadce"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "aa14225141744af27565fe486565dcd2e7df647f2154b93426885fbb2dcd88df"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "850f334b6268e54cc237ce35f5cc27deca188b0226c76ace06c37f3d2d996fcc"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "ddff1eacf6f624d7dbddc3c46de5fa7942fbe3fdca9e216599e9e39a0e917a7a"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "f1e5e5cf7628726bb0348fcae657b1ac913b0594de44d2f707d32c13ef2d9931"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "cb5f1fc7ab4affe4230b4b2d7d518f28a94f7f8a859c7d38d2fa4fe899c2703e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "c26253d1006e8c29d15c988b8611ba8859d292f54dcdfff30e69756f689e5a4b"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "ff16e453eb623a929cef701a239e128aadee25801810b3fff71fb385ac3b38af"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "ed836791ec043ecec8516cd87377874d0958b22560fae2e8c81746cd23b34a6e"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "9c9b94fea3b73ba51b0dc51a099c098b1d87e875e1545a787f84464dc490fb66"}, -{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c5919d7b138093fae11398286a7fb02f8a1928633ed102bf96c033ac2dfb908b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "5b47411e310604d453cfa6925047fc6328b2f28d5c4dc048ca68eff94804ab7e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "b7089366bf4134d8ce6bfd4b8489b393142bd5c4df2e7b4343339897c514b08e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "9df75a6c91a18780272dc91ed956102b43d0fb68d9e482f28eea3f85fa4e8631"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "f32468afeb2c8e000bff47972756a806ba913a82b941a95ff168925e78b58c77"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200824, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "7b124e29362a0f3af40a36c20475dbe743e1c3e62d9ecabfd036cf0b49a76555"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "86996021692f6ffbb8009a6bf6701fd201292983f9f32aad10d9bf9df681543a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "a78d401bd217c9a4f82aad21226b573d06d7c34eaf86b6787fc6d431f0f829d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "20163ae8b3396aff4456b85a30d3145502670a72596ba9bc59ddad404b94f059"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "8b8d32e9238e23fb77d004cc285973d0c7d0f3be9ca48efe96f60c9c3d1e6110"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "10934daa088f4e17844135e0cead75d2061292a547de36fa9d915ef0832cf4d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169168, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "c1dd6223ff53db8ba2d0d3ec9f662a1c050b1ff6e04e7c4f90551ada566cf271"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "9ae4bf2ded85ebdd05dbc51893ea3eeca13eb73e7e9a19d05e9108d273a66eaf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "dd89385db2d99df26d8c2389f276818af07b06c19641fa14c24cbd5b5dbcef55"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "38efbcbc54503d290e02d7b3a7738192ab24df642aaebeb51482a5c3f0ee54aa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200824, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "adad9bf19df163b3fb197538bb5393c007bf361b919b46086588e2749defb40b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "aae998233f8d0f88555b0daeac83c83090e39674bedc7e426bc826dcb1a6d1cd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "9ef253110abcd3ce8d187b76b90ef9af876091dcd932966ead161964c033a481"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "bbc673a2ba77c46135253cc85e857f692758b0c93f5ec645b289ae919cbb272e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "a290a662a5a525d4322be227e317174244f2d72247409c739f2d7040531d4291"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "b30f3c26c639d655b797a75436a8ba0903ed0ea5959d1182ec0121bef3090e60"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169168, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "8bcda4264867768febe73fcb5d0395420b8184c25eb1b80e88d551d9a0059d31"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "16eb6be8af27df03ad4a5dbc506d4fc1b58d05f15f90fb64f59db0de06aab883"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "8798199ea55ac3e9ac1189503b276e06df34ebee40a803a3f59230978c4dca8a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "07e70c9e937bab61c3975b4585ff6868613abe93fe230b1eebf44e22bd93e774"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200824, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "38660c709379abb5122ba6723b321a6c5d980adb487b7589b60e1e1e7ebaf7e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "358d019b8dd71d48ed73b1c2d3e1c7e2182849668f5a1ab550f439f5f0d64f86"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "ca83926194a0a4bd8084e7839bbef33a1fbfd76e995a44110707f5ebe798b1a0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "8fcf2f5c380cbabfb1a8749a375c40b70b5d17914333e4257157ef6faa383f1d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "86448a78cbf6f94a64f0eba6d57585d7a154dd10a1f5969749b0dd77cf48d772"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "28c6c0ec7b942136a0c1ccba453d844316fd138e3c5c58519aadcfb3f71bad34"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169168, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "5243da5d5e083182089b9c7c21f53316a581e71a1862a3ac0102fad00e25447d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "fb53334a6551e74b91da17bd3bac70126d7406af64cab369f7c77baec28ff60e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "b258f5a91012907b87b6f43bfcb636bb41d264b093d76b4ede1587720e92766d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "ba573b29d91ba1885161c540b38b3947bbfabed0be8b6b3cd7e68c538fba364b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200824, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "b2e0107ac8545b65808cbd0f2b809c975a35d1ed65cd0caad08a87230557f108"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "7dc632b606ba942122bc8970207785b91c797ceca1be42a6d68dc3cddb3d9bc9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "f95fa8c512b22fd447eeaa569f8ccd8e23bc7c20043e50cb653a117b543e31b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "2515f7e8f3acdd7617ead6785fe4c38e447cf005eec1eaccf64ba7d2d17e6bd6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "4de702a902dc3134fb668b1338fd489a86f3e8438bbea67ec3fad98b6737a548"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "95895d582a42bcf05e3b269caa0070caac04aef5381ef9d612529939031cb88c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169168, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "0dcf52013d31369ef0f981f9fdb2fbbef0af73ff60b3bf09ca7a3c3c280bd340"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "6e2671fd0d38c82c3a1cf9cc26d377fcdcd610221f9ae246845ba78fb588660a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "1718313339b18d6968fa3d9573f093a45f1ee0a14e9cab9e6e416e32c2d75641"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "b9b639782d3e1145aa2b840490f038796138d5a3f6ce628db2eca1a7e3c313e0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "a44aed72d0af6a5f61cce1f54ebe276c187b3a510ce75a6539d04b9beb1c4a23"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "be21be03aeac61b732c0c9daec9e6340ba998238f9aa6ea24935b2e93cc61b0a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "6991687434c6c38ce0c085b3816f50050cec305452a20a6969735bc11675d67a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "5a0c9cc5d47865695233859c43abf7a500ed4cae224e8ba9061add3fd31198fe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213624, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "c9365f26b1d650daee7c8239d227ca481c715bfa0a59dc7d3ebfabe8b82add13"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "0538ac322fcbc3375fff6cc41b6f378c868177004f8c214fcccfe3fa4663f440"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "effcf544ccaa83d6dd99927e4b1e8b3562a0d913fb98466d9eee7bb090bc93cf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "10cbbe02e179da155d2f6c73007c39c6163aeb4f2bea85abd702cd0f8b518f5e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "579fe774f2b8b4084c9fe1cef32b5bc161741c7c55891de7d3ed37716be0e368"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "636f97ec108cfbc82f77f0b3c7d11b88f8223010454002440edbbbe8fa62ce30"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182480, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "62ca84248cff6588c6f9842e084debb23a6bd5409fdedb564ac9aacad88e871c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "2c2e864fb73d2ab76bde3ff62b7cab1b77dce3c7a8b590715a541ffc1db617e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "92776e4919c2714561875f78fb69561a1111316b2349da9b17b012579fdacae1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "fd2736265d6076de0c99279f79126bfd924639b66d506a2c197eedd03f41fb88"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213624, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "ed0646d6f753346d8fc52222159ef5d64bc5fb2ae6c78d4de1db9eea083e40b6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "5942ce27c36a201095bcaa096fb118ad44bf8f70cc3cafdc73cc63395dbc3de4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "d983a647022cbee8f27518b2fdb8c912594322a6908090d62d67358e8e0daea6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "0437878dcd346eb991f12c17d704a7208d1ad76c54c27c655c268a2d68ff7ef7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "6ee56274774fd1945fbacc3e3f6826f7503cd6c33cbe14d776591d6b3ca89711"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "518c554f014765bb146eedc665c73d940a1ff9987660f89a89c813462b5a098e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182480, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "1fc0c3f18883c2b576c12246843dd524b796c25ee543f8fc5bce0621f553bbf0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "3c575020c252fa5f67568bfc83013ad54b29ae0c4979ae80399fa44cf9ddfef5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "1d36fabf4dde0a6e1aa115e88a815ced312cd648fcc75aaa85e4d351f6d31995"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "fe7a196d57fd3d48649aa532a43b18cdb1a69e497bb76a529fb65b18bbcd0623"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213624, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "fdb3d136480f05cb3a7cd34f1ea103e8ea8701ba66219287304c8832caad7ba6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "60950a921ab7ded09ff8e6a17a265f50cdc9c40b814bf41ac8214950e6d212e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "64ba82d2842303aae7ff93c6ab3c90d1d1faae1e77bada5bc211a44beb1b7587"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "5ef696589e3b8e05c044ef8ba2afbeece8f667eeb593ef2f891ba365312a366b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "524c159d7a8741d17e7bcfdbed1dbe0d050d0719df1a568c23f8005c9f528887"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "17d93d9bfc418dc3522c2a03aa39cb8c6341a6ea4304314c4efa7535d036bc35"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182480, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "8095af26ae7f0629966cc3dfd7e20b1e5706b12498f9a4883a26fa709a5c0e6a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "2e90ef5eac304de569b10217b7c750c299eaff495bcfe7c375a9768fcc7e2784"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "f6aea2015acbfc1c96bf8deb895671315f7cf38b581bd8ed2f98b15943a00c19"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "1ee2005075dd87f84bbc940ca7b3aacdd45ecb91c610484e609c4115aba32d14"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213624, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "b222da066d17fb20fb7f139471e2a1131b490e1e2ed5ed84d5f2e0062ca05c35"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "5b0351db998ac7420d7cfa411ddee1fc9e9a9351bc7190f11df046204320ffca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "8397e53f09ff9c3ac4b120158321df0072562acb4f56ed4d2a5bc68852c355d8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "828f625b7714562cce5f3c9d6637597bcd5bf3a9f6cbecbad0c625767ed186d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "46d8677c271b486b3f4dbf894d47764ddb1aadc8daed907853233722ac976294"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "e873324efb13a266ccb8ad96623725ccea53b45cdb990709e15dcd7f6d018a93"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182480, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "79ee5304232eb31cf98a4d15e45257a61c04e8ea8bb37797c2b74c22de92e76d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "7e34ac4ca2daa6f66bd958537033e4a510ba34daa8a21305055d7ca17460376c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "802cfe7ca614a007fe21a85e9c640963675addd8a502f85a6da0da83942b1dc3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "167f80a3acf7d725e8230b32cce19175edc42ae72f61ad1fe9a8aa65963ad72c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "8b8221fdaf6fd38f3bc7ec734ff9bf15186f0155d0a2931d30099986f6d342cb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "fc76d3d43d56957d195c68eb5eaa7c46484ef243385b213958ff7cdc95e2b422"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "edc88278d79b1b55dd4d92e9c07638f33df6d3f1d4482682430721c9a9793a2d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "8ed49bc6f3b98c2c262a352c31ff66aec095dad031fd59203eec93804ed23141"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "c2519d882253ebd921c1aa547ae8881a098386018f544b22bc19efb245ebf34e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "4f25665b75abe1c1484bfb292739476e1b5b963079628d7c0d3e32c78c275e84"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "193e7639bc0d1301c36bfbe3aa90db9b64aac6cd8f3e9e6006afae22be2ccbfc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "7e7957b828a9c65350ea28a8119686b94baf79051694546b5ac2af93a5890873"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "22d5f4ad84b05d17ff9a0e6c937ca3c5115a100b18b79981834ff7dc1ba68c15"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "8161590fa51046fcf745655e140bcc576825fd001aade4ba6ff72a565dcaf29d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "24b0d2fcd2989467338f8cf1b9f6538da85cb4cd196b471718c178f79c6ba756"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "c190bffef51f34ebbc3cca535ed73829aff6b3903123253985572af49393bebe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "3f39d2d7a36ea50e6e9588f3512e6d6aa000e86a1df27e4a99656750ac11d70b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "0328f2640952ef215a71c410c69d6f04df74070e12ca71bd7946da3dfe0c2048"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "bde94473a82675369806c48f91d3eb168011b1d9e9d2e8c55bfa8b198b215c05"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "ba8ee754bb9a51714832adeba89b60931bfb803b396695bb13b0d6bbcd8a856c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "0dd2ed3daff3e2159b23457849ded014afef21d25a31d49e42ed3a29ee5ccee8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "8a519feecc4f95cac683792e6644acf7276b3e47b9c8187e3267120293f62db8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "77e9a05ae49524400de173351cc540344626f889e52167330b5d6af9d9d845fa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "420c40922c626a4ee1aa407c66d20b460e64be56b36bd4c6cee0586f7b59f442"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "996f4361436447ca1ee740b864aaa940186e217c56d2e099f82feca9d2aeca26"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "00c6c39e04b123ed7942e9f67a77e1d6482d6301736cdac1065dafc5fadc3cfa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "d50a6f31139607aecfb12838c3f97ef5879fbde91c2809c74d1607e272bbdf7f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "ca064a0615ed7f3c835e93191d27419867e3174274a6ae4f09a5310cb47ae5ad"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "70dc49fe4473a64c777cc40aaad768d3a9faefad5a96c99011b420e993c13b39"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "66f088cb381679aaa0881be1333e8fe959c456315610ab068fc0794092473abe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "7c4f6b5b4ac43d6654e4118ca793732023063fcdb6a4b840ed1986fd9bbaf761"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "fb487c51a14d3c30c9376a37e28a0f01358ea5562307e61a342ff47403789889"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "dddf40cb478b37bcc092d561527ff36087882cfba1757085f67ee522e606d30e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "26da7ebc464ad98148c2e5ffe8bf3df0810ef3c0dfc962e994ac9bd466aa711d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "5ea4c615884c7546309a392364e2266fb597b8aab6a87a0fbfc70b41e5ec10f3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "61b60e1e29eb3336951dad2dd824017b14ae85b0eeb5377ea2ad7a432a2c5886"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "4cf1cbdb2b5fc7d7fd2b0b29b83101dc1d860211af7aee60423102e7a479c98e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "3f7a410b5fafaf52ab39ec0ed7deb3638ccea26892e73ee418638859256ac5e8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "81aea9aa8dd530052ed40b5907731d665bc37b44526732391cea0e7b9f2c4e11"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "91bf9754b00cb9336e2f87362563b866c6d94683598abec081e8d750c650ace9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "5540fb93ef1540841e1d940b9d84254a048049d8cc1b9eb132190676d209962d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "ca83d2ba56bc7cc93fabd37de657760583cec2f6d0aab03dbef76e980dd695e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "6b9bd206188c87ec7018d6c45bcf982fe2884127b57e12daabe0cad0cb01bfd8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "75ea1fee8d1e352ccf6b0c43abeea70bac55efb71d8d15b5ba4795edab1efb91"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "e7c2e111ab539546a7c89a05b8a54526ac769295a1a5b28306be8a1a2787e488"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "021fba9584d70e8c658f4b0590d0d58edf6940c10b4e80b2dea8c1ed90b77398"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "21e5b9c8f7eae472620af87b4ec095d762d8ab8758bd4bfbfb0f8f3a86a0df63"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100aKernel_QE4m3KvE2m1OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "1f49afd519e45a8c3d122887e2f75ac863d9fa16cbbb2fb15872660acc05cd69"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "a7d22360c4177a9e943d97cf4e522cee3b668fd93be6783a5961517e1a1efb15"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "5bc59edc894af0a0613e2c0fb6926e3418d32eb78fd8e185a7d54697a2b79c89"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "9de6bf8f47856e3bdd5262b6f8229f44bf8fd274c6967020c7f65440c7ab6148"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "41b50122598771e0b37801ab74006bae5567c3a8a8866f5816b7252dcf35aa37"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "904150933b5d99214c7243e16625bc74196d5f34f3327fa27ab0a618e543b2f4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "4f78dd320ebe8a3349fdbc3b5802f0c4f699de0080073b7724e5628153b16242"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "2e84a469796f97d78c1defe38ca0bcb64583f4c6cc12971df9516b31bdd30f07"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "dcac67ee44de1efa866c87f9f95d0cf53232cc799d91160725db84bda2593313"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "8a166ae2fa5edbc754ef9a8da8a9ce309c836b0f8ce4da6b300ede9ecae94997"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "e6a2579a90e389c1ccb6e10011c01aeb960b3fbf4487bf8bee0f25e48dcd6638"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "5d9e623ea02a92f2aff11798892024eb0363be7dcc83f0fe5e5d86b8a623494b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "1f91045371232ddd59e6bc3a199593b6c05a236fad4f3f17feff63cd2b3c625d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "efa83baa0b291df32029888acbf8527eaca58c64228ff1f76b3529e553a56357"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "9f20b7ec82ffd51066b501d6d296c795952185c848e4b57c601de8a38da8f366"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "80fae3514b2b9d532188467868510c9a7fce011d1c36494eeb9f8a69e4744208"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "76305d48ff41fd4dfe333554131a606ae5829fef043985bfcbe44ffc0ae205f4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "ae2abb326b8508f8e6bc44f530e25e70777bcb6eb53e866700216be7b3f30e13"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "bf4c109f34648376192f6b659f8da597e40c817e5bd55bc9884c9156f3c257f1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "06ae8f612c955202f285161e12265ff46604e5afcdf98aa60a0fb272d61f4559"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "361afc34bc642b5cd4b8bf94a24b39578accdfaa6ac8ab526f52c87706c9afca"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "38af2450832ed2d200d7ddc3ffc9c067bb19b7cf6dc9951b8bad2e2c32530cee"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "46cedc42635b3ce993ce00e39d0f2ce173846c1cb81590459f9ec7b0ed86a3eb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "9db673b129743fad5a1338abaff0dc46b611daa263eaafd5bafb9d942eccc279"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "197d009e2e938d3b41cf9089fd7a59fc0010bcfbe239db25bf4f0fbc7e2fdda3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "e8716fe0843a121a911eacf67c74198f67db4ba84b2dcc7fa70c6bdff4d78bcb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "33d90e7cbb3f6880e9ff52731a699570262c87aa61670a253dc45e606d079ec5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "85003f236af5d5a8ceb8f5dd45ae1a01935c85d56ff84ecd799b532438b43ded"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "8aef07edaf46638b6b13080f89b1e651ff2d503885f8a63ca30d5420d864130f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "e7422a5b75eb14a7d0ade2fd1a5638dccfd0b8eaff9f1c58f154de0d88d1cd48"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "52595650dcf4cd06dcf4e67360757ab2ca81dfcb9b0b29c387c39422437c6fd6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "de8b79c7359b6ede2013e1e4236ac646b3209ffa291ec685f7d35abb6674f5bc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "29b9a0927d9b18c43fc4cac05ac7d5d61e05dcee72d62cc3b16973a14b71137c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "a2571a10e34968f91ed7e0fecea59479e111666b44aaa6429e3569516d8b2576"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "2ef5eb471b481e517cf888d55684cb2180b792669fc28371a3b5f4937dd0faf5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "2e7ab27ebf3ae5f93d9a16225204573972135d5d0d912e1792bd69558022c2df"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "3af88a609f9b2ba23e00d68073d0859d291fc8be34e0bfbe67b9bcc2b6bbee6b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "53f93dfeb190bc1471fef6276b5aed198b59f94283ab4e27fd1cfff2f9cb96b2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "f9511381c2c6169ba174e1c40fc11def677aaaebf8f7efc700d3f59ba064fe93"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "579027871b5790b4c385318868455edb3d75a031bae02ff3563089ee5db64bed"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "5effc658e14b8272c9b2102c264768cd2457afb2ea6780711f44988d418c8a40"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "f81d59d7d0fc5c847f3f2af016a0ebabc35a87289753a1bdcc89d1747e4ab03c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "d191efd39f2c9d2d70266d90c68baf939e10e2d1da565b28c6d4703190a79566"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "95ec5f71deceab32bf168f14091c218044335bfa80cd330c8417ac310d65b57f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "f212ddd3b667f69553e79bda97e02a04184ad977ef570f9a46b63d081dedcbaf"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "082b38a3733f4ddaa3b6f20cfc8d03f5ab8ee8fb06999b3d5a5d222a67fd46bc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "37c1b0a520952303f4bb94376ff0afc67ca83af4fa4cfd1cfaf7f0245ebd43f5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "29bfc437312591710498b0ca6fcac9801d2165293463c2c9e3c7e0e4d2795e07"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "4f4782ea8c900bfffcca95398d5abe9793d8ad246c9ad0199cdd3b86016302cc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "f152939b13bb7a932aaec1d268c18b6446a57d734aaccbb77ef7af2ebf923c2c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "67f35e6afa05177dd96955c3a75271e9fed0122a17f70259f0f0314177aecdec"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "e7d9b245f580371b9ab723130b416a30ff7b505b90d4dab523f2303950087f08"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "b75d12e7a21e2cfcb8667fc3677d64451289fe77e697fc9d5a18fead4f24c177"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "c2d30c8fd06fa7241bedc8259f17a42a4b7636cd7d7c1d1fac8da1628b980be8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "74e0e5ba4516436140b12edb8fda74eb43f0c34ea2d72d0f5cd941df297e0c7b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "8c13cc43896a58dde2d83db32b8283bf480b70487a57916386a4463e37bfa2d7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "ca8d89903cefd34b918b647492d08698fad010c1e837895a0cfba64480a18027"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "94e5ac504aa7bab41f75f327d494eb55b6a7cb1c4fd90446ee13346eeb0f8f2f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "afec6b265db7e459386758d7772888aae6b258259eedfc80eb45867e9651434f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "0e28a0c9a5dfdd3fe334fa71c1c7702d4c576a5101dd103a1d4c28b88b36ccd3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "07f8cf4d6812509698c668d0b1a8845bd9ba04b5cc7d71ecd63be4c8def30e6d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "28c0555a6a9c9da8a33f01439022e16a1b8fe331f05a91e47ea804bf06b33167"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "50ba99df1b60621d973bd28bed28d56e7a0beac1e0554c19462412167d12af0e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "38c2fdd89c53ccef4f829d65a45a033bad3ad908ef484792f2377a65efdcdc4f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "9d2a5cff0d66186d00b20ff6eb5a18dc6e742af07929fd75955f5c98cfe5accd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "7c342934da232c453c98220d993db7a16be0a0ca494959347a3dc69ad79b5d75"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "7f6288937fa89ad88e286653f751a440de056e06495a2ada7446d25189531c0f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "19c19e69da7a6ccd08ef9bc3e1ca8a2203b60c1f2e646e730683f120f4c85787"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "889b7b8c407a558d4491fa5c198c022a46901f934fadc8d76657c11c76b755d7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "302cf679d96a70ebf2c76e1daca2f9ef3077f5afefdb46d8d16e7a7c3001b61d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvBfloat16OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "1bb96c7b43e5a95475b9bf5d79d9284652ed92110bfec004ab95c3dad82bd0e6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "e7d1b275523884e3c7679ce7ffef231f98898c4f86975cef9032174508a902b6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "86ff442411224ff453dace8d88c751ca18b1c47c4a2e72876d8603d8f543e762"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "6fd9ea0e9cc06c7ce910ccf50dd664e01313e8a05ab10fadeab52fe0209b9b48"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "501e1c729c7e90ab6c5960ae1a63f5cdc0c36bd7468c5b6c10a515240aa5ecc7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "1b8c83a3ffe081ea4af76413876f8e3fbee3a4f83e34671750b0e417d4493673"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "bc5c13aeb31365bf140fe819e09e7cf763b43c4ac477eefab3245cb83f21d95f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "847c61d0307696ec186e3b0279a3e39e713d17074c65d88862df75b0a521d9d9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "f5e805f2dc18c7b65dc031a73b42566ec7c8871ffe22c3438edc2a7e246c3815"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "22ed279b98ba2e82d0d3723c4136ba54787652450aa7d95353a45f16a073b091"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "f285cde6af3858a799c21e41cabe8e4c2e6ed3e1c651d9e63a47f39787149fed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "a766ff33970c05167a1781dd6f4cf90cc3e75602c6ee7de25f847fbc3d4a3ad5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "95420dcc40ebc52687006db8db030e6ae1691a6f1fa69dc87937b6519cdfa1cd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "07ae7f2bb2336174a68f3fb8eebb6430f06221eb440e6fea290716b0319c915a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "e24e929800977f2351069b9a018ef40bf0ae852f6fec45ee91fe347b0128bf03"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "3809bb51089d63d127d3bcce37d3692277b99cc91893bb393f32f0c87c9abf3c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "5c9a65079c39fc1ad2d386686b7f75296aa8dd6a1d6c57990d8cf67f37a9b8b2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "6e73cd31506f2bd8ad56a1420342139968d29b45bf5a05c4f70eb9efe94bb1c3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "6c527f9407dc7e9180bda2bcef6458747f695014d5138b9c49e667587224270f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "8b00a4b89aadd47237d189837eb9c4dbb73ba840621fce85dbf8597537224a2c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "295e8c1fd8a0c5257b6c1fe66bcac2dfa3e8c1892b97a176974e64a82146605f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "543a495d4ac8d6eb20813bafa08245dd42123c31d89011cde3ddb8f13790a411"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "93ce0bc96a070735a07db092a0dcbe5f03a983ccdb987d9ae384fcc88f315f79"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "f5b82c6a0319851b776d4dea4a31b5a6329fa034bf4a8a2613ed93c028dce2e4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "f49a794dab421fc1fbcc8f2b8f51392a4836924a16c605cd8f62fbf3e4f4e68d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "9d664f84de3c65bd12135d7ad1b5fc46742d3c71c7a279d45f3037e36d1a738f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "5c63f0585691346d7bbb27f15c0055df6fc18c34063f1d382f38e60ba6e48945"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "3d916b44abb5b7a644546c998118f8280ba32fdd96daa8e6e796cbcb09d880b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "9a8ad47686589f0d509ba98af340fdf3c10b7048f3d2aa7fdd839de6068d62c8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "c430db1dd88727dcf2f1af52b5dc2825acc6b289e8eebf881de05981b03d9f67"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "7739cb22b4ad40fee6c2ca30762046c58dda5397d1905c765922d2ec160f3bdc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "66ef8ebdc9aa6f40bd122811eb8cc5bca474d57fe3f7c639b462b584ec437279"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "2d12f7b099b65e9603293ea41a493e08eebb846025b0a0f78e0c52a1404b1d1d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "b2971e46fa47543a1c31a6cbdf17e6dde2548129405cedc95087c8c0ee709477"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "90493f0c2043297c705cc967a307d3a13f4101566e36022bf3a98e1e77c34f0b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "6f76d193795a910e968b581f8af850cf39cdc7c8895de4f6caedb0e02bd3bcff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c43185bc36cae27061a580e8a8f8441a38a3528654c8962f9d561dea07743a3f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "5fbf3842dcc2f4cb791c62acfa28bb8a3d34f355f666a73e541f812f08453886"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "bb8f2fde10196f4d688f29db57c1202668332da6f768789778f3bcf50186e9e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "62abc6e8f8af98256d6e4a7ba3768d1f490947cacc950d6f00ef423e42aa23e4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "3d53633b0d5635a1af9251385b13df8bbeecea5451440abce380281557832d8d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "eaca321a26d07192cefb80e159cd63ad157976371b1982f6acf2955dbfa98d75"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "679e5d30c0ed420ae0a396e78e439b0dcc0e045be2cdc22e4b1f47a0473c59e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "7cb8b04af366c4cf8a0693ff5fc751fd5de98f37e9db6bc508e59cdfaeee1f71"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "23d011254ab47f98742a861b10e8d028fe5bf1190ecb2be9ba5751757282b151"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "4119ab41c8d1f948d02c525863f623feb6404160a370de0b64e6aff1bf126f7f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "8a0aba78eb20c6ee2c284279311251263dc88d4f410f0e855fed4bc44b46418b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "225014da5b4685ce6f004056df6d7c85ebc2c63c858702b86b365c4bdddbb333"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "70ed84028fd356212086ee598d7828b15a4bc347aa0e5b9dc8470362f1cb3406"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "827a42264269acf5b1ccba88f02438dfe94ba1a41380d89771897f1834d950d9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "ccf19f64203259ead7544d8b3ca4acba222837917c86b18d6e71162ad8dcab16"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "d1336dd490cd288053aef7ba9f543afc7069daca6a446f924df8b3961ca225f0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "c35eeb4dd418237d08fd1c315dcaa2681c813485710b4e71b6a5020372d4cb8b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "df4f9773c7fd35f3304b7907ddb78b5b06118b1f52036404b50f85961d6c0a4d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "77a4fbab839b7acda2e52b3ae3095a09563dd69993c7b812f58105e9f99af421"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "e86b17c8406241ddb41e3440f71a3b7fdc183dcae3ebf5efe3b8eaac37b08d2a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "8a76cbb460ee011490e1a1d07c2665799b8e076e1b5bc68fa70a68678e8b78c7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "9ab35fda78a1ccd2337b34bf7004cad5b94dbd036bff3892504cbe13dc152450"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PackedQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "a7fead941796e9ecd564ed80485c0d55b1399181cea15351e5c6b46c0f281db7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "af8a0e43fc46d95beff2a4fca4413d4209a6e4f9a7db03f9610fa1135a3d23f9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "b7c0bab5b220129bd4781c9ab59a99ae1017456f53a7d8c3d403c6d117c5df15"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "a50e8bc70e19044df365f04a56cb76d7c0456f9ef21471f9aebaa446e7e6e8be"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "02d2073e1c1bb327bc0d27979816e316f97986ac0f83358e079d5e19cfe7eb3c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "027a1158c5be075fcd09d563455cd4bbf01225bdb683910a4cad21b0a8714ce9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "d9f2c81bd1c17e078aaf1a195964725a3a2862074e931ead958d8ed9ef5d6a5f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "717e1753371a488a3f3bcbf09ae387812dc964e8152bf1a600964794f6779d84"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "c228e917a69a254e1b3423cf156fc4e4d0fec9d4efeeaf4331dd139d90f6b1f9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "306b878a14760d65d4be45128946dcf23a34b491e752f31288bd331e70fbd2d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "e0fa25d29ec805ff590a1152094c750f4131f53a6444887ae5c2269fb0a1698d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "954769b36cfff21b465b63adbecdc7efc964398710d6f371a6dc52abe21da50f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OBfloat16HQk192HV128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "b4a25391cf4f8f58b31ccfc06b9247329087e6ae68bdcdeb63f39a79a9028a95"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "dd478d6bc7a346e9f459b110f3b6a24d0d2250ee4bd6d9b9ad3782c74fd055be"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "c833b46b17b69c78ea74fd2f7e5b7ecffaab95604939b10b454e6e804d695824"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "56cb78545bbee665d213d2737a3b45e124f286eaf71801d63f3d5ba62ec66c0c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "3a548835d50aacb4f98174957e131276376d3185b5091849df030018be4419ff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "2256b7371ec2720fdb1c858793551ee67607e892f6642e79ffab8af0c8eabc25"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "94b82ca9f8b6f255abf72b8d46122de3eb9553d5caed3b4fd327349bf6897f4e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "799010918fa5ba58f983ae8ca53f4e60af4e2deb678654a8568c2b93ed8b34ee"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "4573af5162b3d8a24be4244db8c56420138eaff2e835de70bc46d5730c1f90bb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "27c98430ef3fe9445e64f00ea22b27fc980809982658d383d68f55e9105c2bc8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "33e3b1d4c792a01212f118fa6d997a189366b0d8ba093a838c5d5ebaa7dda2ce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "b96ce7321f01ba6dbf0cdb0eb0e8a2ec1b544ea0b4f02abc9eec8705594ceef9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "55a9edc750139519eb2dd5a47548835cafdeea6cf5ee597a0c5dbddd3043cffe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "5f38d9ee728b38ecc23868f7e9bd080de3c10654f74625634342556fcdb428f2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "a45069c3911eaffbc094bd4ed445a714d7ab22511d9f05b5cbed3f829cf8e345"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "6935f91ed8309992a7c774395b1e069bc5009ce9811e9b9bd444ea4b42f38d5d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "a2337c27906a152692485962fd957e42b9960a71712af7c0381b21d85d34e37b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "1dddfaa7f2e673313a36d49099cf54d47ddecf26d3e9b2d10e49f92afbcf5a45"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "11931d8ca84912aca9c8f77c5dd130fbaddaebe886314793f424130013012134"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "7f3550675a84b7fa7f8ac65c09e2fbb08b198f080fc697650602f64a73f0a353"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "0a56d5bbe19687ee697a0fdca860779960d45711417734271e9f66cc75f843e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "15b89afa10f83397e0fd0389780df85996f3825917bf0a9c84bb2c0fc9f8c922"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "9289751a836a0eb6feaf7817cf1682542bacab43262fe80ab0ccce7b1463fd6a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "d50f0541a7b582387fe4a348005633f1cb450d7a23ec3b45173fcedaabbad746"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "392b7e135b664e9ce6e9de680e5c4bbfcc12817cd93695354c8d1c29e6cd0d6a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "e6d1bcdc8037c24a52f5732957c7f77f036e4016bc09146b51496561fc1889c8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "2112997932ef19648491910482621e5df71a4cea088d78a6aab80ac101fb2420"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "e4f3818fe077c5bdc30438954345890db8ee239b9f18b3f8336cde5bc7dbe91b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "7c478299e5687308661b7bb3ba1b82c1af916a5c0a7c00a54f69ccdada82d59d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "5daf04d71d9f33c96b5b4b6129c8c8e2d506ef1b077d9fad92adaf34102827bd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "954b436544631eb553fe832e1db0d2b56b85d085a228494b573d15484891e5be"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "cd8d2f53f25c06859d62e5fcc34a0826276ebf3ed7611335663e3d4a77cd1461"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "99c4de1194f7951f4d9d6dd3a3874acf9d18f63ea53daa0485d69ab9ca29488b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "3189f4597e84a2894829f1727222758e3d1d2e7ceb8f45ba08047dc69ffa1df4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "073ff446f4b5338974bb21a788719b7d4a51fa3843c9ccf19b3a311f7e9c8989"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "aa8a60fd48004cb228c0f32b42854040b59db2c20aee3a588be2a78cb234d079"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "66391a0f06103d623300ab27568d5352ce8ea1600b824229722caba030b792cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "bbbd1af9b155bd6c1a5a26512746253d6d04f25f440555e862860fb9c09a03f1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "1dac9b072cb5f61dd182bb08fe985e96f2b1b6685e617c3f260dcb4d4c61da1a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "d63bed985c24bf05b567d60de7180e37964313db7972f3daae2c454c4f163017"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "501acfd423627f659bd8102d2168d9bec4a263a71a2c28e267aae69e8f0d1f00"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "d9c54541e4c67369760458f2dd8372406f74b1da4ed46f3d4dc9a49adf494735"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "b7520a4fe355c7d204643726d3d2bec6f600b8df2d24af5f3695c646d3393c3f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "d9abe39e4d18a6110bf5cc31c0be2c13f308d58e7cb5f4dab4cdcfe69c3a3c61"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "8915c66b34e074696cc7dc35f2072b5646ce0ec43e09969966c0a570e085ff5e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "4bb4736bacce4a94d5d79c981d8fbe18d8181ce3ef4bde942b3ad3d29124ec2f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "fe88d5cfaeec634602d1af1448313d12acd7b733ea08ab1dac3f6cdaeb947036"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "767f7dac85828ad298c964c8b94198a22b441fc2757fb1b4bf2c49e0af74a24b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "bdc863c7a856bbfdb451f1a56cc9b56f488db9ebb9843a2d71999a4caf5369b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "ef5a83628bf28720f58775ac8738991d841cc99202f1a889476b714c50bd7215"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "330b2b7a2231c787cc1e562d6d0a1f1917355e9e81ca026a7f2afd6322c0a703"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "e68bb4e8857f7da93bc2ea5804e1deaa1275f663236fd2e21ce9f474776fbeaf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "8d0f24fba0794c37ededa81ae01f9eb4484c178ee546a8f984401cb075ab9aa9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "23999a71b8d498c5b21d669a8f79ba5a440b9574b6f32d7b18e21e584b504d75"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "46d5ccfad5734e1041f47b9980549bbc8e255cf14ed8c9f26f9209b688fc08e8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "7daebaeafd5f55c643a50b8ee3ba3538f3e9a252df7977128b712a5d81056ba3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "b6a9ea8762338494b7621125dc42759fb78f7dc64d327f4079b084b9b9f0b045"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "de2dbab88d0123044b93b9bde38cd7733574e14a59d14ed3bfdaef5edba9d1a9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "67bc1f58f53c8c8ebb472975eaaa7c039dc36de8919448ca4f04c11241fe9c9c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "e288f75816bca281bfe6e6edd6a94ab5c5106bbfe744d7051f0f5c3de48a0f17"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "6521e4296690aa5631408bd8d720a8dfefdb02a56cf668adadd35d2d22dc3c93"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "7452f8578e23fd69af8611abd13f70a10a069dfa1ba5d45c0d7edd24bfc79eae"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "6537c6beafecbe61bf6692854dc40183fb72b1f4965e30e1651f7bd70a9f710a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "9a5b563dce82ae312b764fe694acb8c8bc57992812a7c0221bc0c64b0f34d51f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "65341ba308c98e5adb0be9102711ecfd544a0963993c50401abaf9709d0452fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "992395e22c3c285f470213e04545c95b065cfafe31452616419107875a2cc030"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "d43ae33ce94b9544a1d8b780086beb9d4956ebdca4baaecc672494cdbd5ebd3c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "96452d7d4592cba1785097e4a2cfdb5971884a91a6fcf2e8c58e04c00ca539a9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "988ed42b6c2d2c65cb39829835c214bdd0c2e137c3410ed733a81d86f06c213d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "3d4d8bf3d94bda5159fba579ad149b2b421f0815c10559be563e74f7440707f3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "3a517ff4b3b4f27b6d77a13c44d01dd8c321b79b7b5ff71aaa0a7df6afcf81cd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "88b7f48ba475ed959a22dd37b58456625786605d56b49246427117f911b3445c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c00bc61203bb2f0afdc175ca6675ed3cd8b7d19249348f10a5d3cecf53403c0e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "217f6ff2c089e93b8e61cd1c3120ce0c479bdf5fa05024ac4be2f2b8be068e28"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "38e424b843cc0d266f2d5fead0cd42c31f043f95e9a8af1816507e076ea563d9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "e1fc724783bc64d19a5915358a20d419cbac9404a3acbad1c7f29dfcfa44ca2f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "2ad276b7828ad9275d95fbf2030949ee0b44d2fbe1f7ed0ce3ec8a0a9f1afd77"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "df5bcb7a22e7021432e34b51d12ef7c0097cfca5e5e939fdd885485dab6820fd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "3099889df0a453c25724d3cefc1b630dd450b478f7abedf4dea718fbcc01211e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "8cfe21718155b0c679a2589f15bb6409c3a71a06df9cec3b8a3a2521c63f91b9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "4ca2226dd419d911bf7f20bfb6bd7c42ba50fab24d279c3579c7cdde99c9adf8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "d1952b9354abd02696305e17acc13466c887ab224efb958fa15950b4fd0fb8c0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "22016da35e011832baeb554ef0a128ec94f199c2c82754cf67a6745d2db24667"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "abe5dc5a5150386f8fff5706e5eb54834a59c1c7d76194e123ee0682394a1dc3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "402a08d66e7e203ba931bb5b797007bb767d0d73719bdca0c578f8e7edee8632"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "52a5cdb19e61164290fcaa3be987a01caef54317bbc7e7c8c1ac09909adcca34"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "69034e00377748c5002b501ff57699dad672241954de86dc51fe4eb865beaaf2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "8658b7ee552dae732459a3dbad7b02d80d0003b13aa09d6a4b7776796ff16394"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "927af08e17361e71f6893694a9f065a8f089014931fc3da4a9f785f4b83de87a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "59e754dcc78980ba6dea49e0c5737a502d2b66ddbfc2a72b7a60d693fd15eeec"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "8945c1520d615a75328c052bff95a9e1df42c1c9486c9cb272bf8c80429962f1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "0d0048adb0207c44500f29e25c844779acf95a41e12292232659226c303886b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "9ea2d6f49af9747a328eb0d3a912827b3c9636f3ed7f12f4e09629aa22ab9d91"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "11adfc33a92d3a9d1f5a566c8a240cdbcaef7820da873397202f4ff630765e00"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "1faa76d876c4b7b15c28e568f28914d21c86a13682e8610228adf5ea3346641e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "0a66d2924211f4577d1548f2b396d1993ca5a5fe1fd8e4e364ccc31220e6b038"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "fa9f01b236fdde75a05c64dce25538d5406578f48325e942078bbae16ee09f45"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "f7e6ff1f7d5ecdbc0d62a97007c5c1d8169b88d855d093cd48a8c4556e8ee0ac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "12fd137ca0808760765722d925425ad9cd74e828cd6f272b52a5e6ec0c97c255"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "0f2202defceee5568028d261d7faa7309bcbf13527a6a785781da527303d108d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "1d49761891c5f4bce6be33b28aa0c10f23de5e5dace1b97b21baccd3014e0db9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "6cf049e099893fee38ce1b9e9eb50722f060f9c1464fba77de40d709c91e93a3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "ed2e174028cca706536461eb37c1e5b300435b5080ba6774068e58bd2078b072"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "caa2a035f6ed04eaca8b0f55cab0ec3a680b4aa33bbc83954036fd3a6e4d36ea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "3a9dd28177e228b60888abdfdde7413f32face4e8ec2457f677ff4c98eb2a638"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "352b333b73fb60495d3e13156d5a080d707d657bf036cd4b20b50d9fdd7bb380"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "936318ba98f7135795c07b4b0c45fefde69035ad59a03599d057cf1060fcb1b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "302bc34e5cd05f4f086c4f5178ba0097c1a8294193e6718165f8f153dc2ca9d3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "0f4b2649a01fd669f4d9cd69046e013313cc3fea7ac6cac67d373b9c2f53c882"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "2574d2b494abd3f35c226b16eea5419f63060ef310d08b1c33c30f9339bf7402"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "7008671c6065695bbae528f451c353f3faccaf8053e0c536857fecf55db59fa0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "8b2c57c9f8459923cfdf283231c5c2da9da2b9397ef0acea7dabef306b9e0abc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "27237f30bd090ddd928eff0270ddc43fd0240412f4cc3f6253704522874fd0b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "3a31649561aebc6f21e84ee6be9e9a4586980a7a61bf8c42b47c6793956848d7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "c247c5c4275e04e635ef55d00f4f5c39a64ed6f097bead0a20c2fa868ce3c51d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "570daf81ddcd94bdd68d25adddd9c9daa9d984a404fa8c1c59c68ad416753f88"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "b260ccdca25064d939caa56af90722f7988317e3fd630960fcb00832a5e4661a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "b60aae0759259752cce9fb615d352e7a0f66360ecd383bcbe42e3e1119fd061c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "4734c3647fb0d0de4d6f1538af0b446935de5d092d3f1c3960031aca5b28e90a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "350a16f68da892f5b9ff2515fa5833ca311b062992b5cb08a6b0f6fc4b055a11"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "d3d5cebc9a9e9e5523e7e7d4ed353b5ab7a6d3afdcdcbd682bc71c8993a1e282"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "079e5a7f85d1bff814127c48ba5b68e02845f21b0dc1c95cc56e45edc20f3e2c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "5ed049e22562971799b96ce5abb8e0d5c82973292b08cd0b3605a2933f403ef1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "48b21e3395d279f120f7f0b7b2f24ae78d6e110cbd913715115e9e37611780a5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "0845aa36667ba6c0e6e82a8b86321d39ecdf67bad09d9087edbd6b83b09e76b1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "899b66147d986323d1f518750cb57e63e35053a214cbc88c5bf620695bbf403f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "444dc520df8827b725557c3029b49245ecf96bf40fa367b96e009d6eca19c0f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "7c41edbe1eca9d90189252debe52b8151fbfd2d70e2d54327692a6dab1e773c2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "1928b8c1cf55a496f5ee7fa1088479b2c6b956240e2649de26959ae8e548b96a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "d1e85e2968f42926f7be2590e9e33bf5197a820775ef35465b7898c755994b17"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "f4b564b65048baee571c028ec6c63e0bda1b9ba6fa4f7852b3d5ac6d56a90d1d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "4f165f19690bfdbffdae981e5fd8e1714691f3ff771538fe2ea461eef4bd96c1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "95a7dc5f0a5e0ad19a31c33df899be59cdd4ac6204bb7a3fd6c64d4a0d8426f1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "08d650f73a2c5944e0db7cdba04d99f9289982c79a06bbc88c9dcdc4bf773d0a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "0e630ed07b6c6d1a02eca7014c66868e20c937ef096bb74aeaeecb127dd715d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "f8e9028ce59d5274f5ffe9e9b4d7dd287b627136d488f3c7420d7761458a26fc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "6f36473bde925ee276368d9cbb28c7ea021691254588a958a9f81a3dfbe2f959"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "b1ab1ec2aa1da0453a52e64ce7c186ecbab442f0e98cd3b5f451446a49344f60"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "aad49e304633171218d63fe0537ed762f4046c23b1697c4ad2ac6cc07c3188fa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "e676316197dd13d8c282b17c36bd65807bb7aeaae2f54d2eb05bb2042ef92462"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "106b73c452cbfc363d32175a976ed67209fa42e6d35807e3b5bac9bfc040dc14"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "b047a83fc5ed649269b6401a54cf15e57ea4db079d8d47c643474746469bf1a8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "0367fa74d8d1629e25c82067bb9e41803277e51f5507c4cfe49e9c8c31c2032a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "4ec410a84b4b4906ca9f7629e8905f6404c7e27c44b9fee2532bf7f5661ecf55"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "e25a0f7c0877ef5536f931c1610cf6d7072bc96a992d42c04734bff862e681e4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "db927063f4363ccda6c29dff77a2e86e7a470ad82d9520575a5c421458f840f1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "e6e0f186a085fdf920e29303bd6554aadadee43bf1b96d05dc1216545d0f7495"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "d69054c2f05035fe4bcafdc3be9b92f10de70a21fc6279d696007114635b89ab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "e3f253e5fd1cc669f756cc0a2d3bb17dcb625a23e4f83b9a103181ed0835d712"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "d47744acb8a440f5c2df1f6252b4275e3a1e0b897ab4a7ba54f75a9a7bee533c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "9c20b7bfd4f840dcd4b8c58ab2aff654d5566ba83b52722af5c07fa47f692743"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "27ad96e6e647c17d68ec66d84347166a40f436d6da6af275252dd142df8ba3b1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "f33c6eba31d12f356a6b7d24da6237ac2694faadc519234e0a2f44598b70b338"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "3be4e43931724a739b1c565b05a7b8475aad81866e2eae4210f86522ed61d2ae"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "08907a0f44e2afb3e0e437e27f00e71ca55dbd0555280d2f1f2c9f3e620f896b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "94f4133ea53d0b591ddd42746845054b51be738ae8f2bd0091357d3ec7f78b33"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "36a7fe126247f73684ed55a8b2b172259847bd33a03f6d2a15da12fd5a99703d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "eda1a3eee17e7da8bb406a3a4b5d151135571b008ea876b3f6c89c8ce38bb7c8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "20ba73a3ba774ce16384f67efed4cdbe9f82740be87d96afb0b4f482695694f3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "e1714bbe14e0d91e9ef4f4c50113efb1880af147ba660f92b79d1faaee75076f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "a112036990ffc48b44ee9ebb9df4a4ad5c07ca2436107777c21fa0b446603a7f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "861d5d39293be9e51e60a8d8fa55cd6d184a6fe600c3acd43c1a2813169ec93c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c315224dfc7920c25ae14168ae24aee7e9fd399a7d3b601e09b13a1c89e1c15a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "2a23695591843a17f99c1ef4e454decb300c8030209c63f1ea8eed4638a8bcdf"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "89e2413c2ffd8cc0e60fd395adb89a952bb4533b94032750ed4f0695348f10cf"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "42b89fd1dde74fbdec9a88f659944e697a086de64946cf2e3eb07305f82f7e56"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "94b8e8d178f75d4c487e3263997226bc5e274a32aced385aa8a57d667b1d9b3b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "4fa19f3cd155dab6b8282414039ec490fa164763884b794ecd49c4ba9977e4d4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "210a978984649b42d085ba9fcd51f6207464d1c53faf2afccab787139f9023d0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "6c1c9b4a094e42dee4cee3f1e73b564b430aa813a5940823ffb790890d50bca5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "ec758644aff66ebcb777d5a5d572bc8866e44f968e4d644dc862cae21de3880a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "948b4a15b03d1b0d9bd79caff82b3d8392cde3c29614e31a92fc8d88d5b9880c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "a29392c44076c08511a806dde93cdf5d04bb23347a96be79d9f4abe72d3d8b0a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "eaa105ccb200312834295f5bbb7c0c6b0ebd28b2ce2793461c011f64bd3a2750"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "233319dee83c3898346746f88c72b7f294b9fa41c557d2e56f287bbfc4d745d0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "90ee4bb2339563b443965f5a93a797a49443e4089b4e7b0673d6361fdf726885"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "a29a8dc953f6abd956d44239fea9cfc718dc9d3ca7bc4edfb30143be104386d0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "5c05358ea7fbe99f44a142c38799aafefb0b1e3d7e5b91cc383b0e1542a58950"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "11a18d6d8de9c30723a3a33b13c0bafae3efeb6036f6c4bafbe1db2953709e5b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "1c00913bf879993539ef72353977b74f9dccae7f7d0df33f205a68cfc9732044"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "46e8a621aac27b4532c7c2267849f9122f3792e3bdcd93063ca1c3679b67d840"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "c3a613cdd1430b634c44d5f5a74d3c4e62afc96b8547df1e4c96a91bc2fede8d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "4a32719337a4fea2ea5a4946b4715ca3b6871e791bc947a33f9fcd73868096f4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "9d6e91a1c09e84ab904aeb06e2118487fbaf69adfed86a2c1fe502a346e6b70d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "bfd5cb7968aac2ad4fee38240eb1e0a030c3706591b6e4c1e558022b8b5ef0b3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "60d0df213287281da1f482a94e919aa6a7915d83c7ad2b2e8cbff4a51cb4eba2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "a3bce13996f7a15d82668959474f19eca4aa86cb1fd069fec298e2bc7da01c32"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "3892ea951b0e58c647bbb01342e04c05d335cd2c44e2f96ea97bb7ab4d939066"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "56a1296780c9f5e7b4fccf94be374154686d09c6ae82592bdb553417933a2f5f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "35dd9ede883de5587b5fb802bc605262f48bde52df1a3af627e1d3700d4cbf90"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "265e6b02adc419b2d524fc7dfa676270126bdcac8ee694acaafccc2d9f7ceb25"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "ec87b8b5ffda8da6b320f6d407b1723b9718c80f24d7bb768a25a89fb31e8a6d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "7e9dcd095ab3a7bc17439c6bfaff9ccf22692635d06a23c5919466c2bc206fe5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "c73401e33479508830cebeb2e0245c5af42b096085f858613f8ca8de1eeb34fc"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "5550b05169912c56630c07ad52f5ec19bd50603a858a2105c161e4ccb5ede2b6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "81b13e079642aef4808d2c9f1690565844fcdd47acd1ab98eb3fe2f129c58820"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "e795686f2abf2518d3c21ae1333272ca9e7cd170bd774628abbd1ee6e84f397b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "34750157f4b9525561951e28ff2b06f03b7a13f71df340d33249cc2b6b6ff95c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "b24b5adc271c26b153e69b10970981afc4be986d8d3c20f3112dca33d3a2c42d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "ffeb8c4ad21146765e8eab13287ab0b1c82650a35ca6bdb5882a4487c5155e7a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "c89592228435e89ad2bebc941c2392c66caa3bd4c01bfd54305ee9e8fd322edd"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "3d3357e807a0999f38e87f51014c331f36d1f639dffb9a53ed03f11b0e7a666e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "920258ea2f505176f4279491b1dff7e1d3045100f240b72f73726f017d70a7b5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "7d26030a63204e17b42f5d9ebb42998fcea90fcfd12903795b8fb8c2616794fc"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "70b1b3453fc36fb0ac4f79bde15b139d8650878a1075f8f41af023f708bf5a81"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "977242dfc724ed6158b1629c7a0cc7042f5f5896394fced5bf5ce9d419e97275"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "bd211e5ce680c65be00ed106ba6db1f1a394faa25d5e27cf6c19f94cf44c1668"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "eb4f77ac7151d4bdfa635db2278ded6992e57d551ea48c452e8a46e9beb53143"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "465294086722f8939488f59b03190e97574da9e87d9f545c9d3d34a23399cd7f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "f49256acef3f5de04dc3692f5c366eea0bf4fcdfac212fcc345121f7bd7bd2c4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "e89ad49b5a8eff01c5067c9e60c4fbfc83d866aa8c829112eb5224d4c8dd2649"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "c239e9586380ea6f117b417eacfb3132938ba432f4878abc7ff9667f7d223404"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "6a9ad79c88667f507e75d2acb2b18c777d439c481324af5d079cdb6961c52604"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "2d87708903d39444b6175b27e0501d4087686fbf3d80b7fa4b2f20648e0eb677"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "b6e19c057872f65d3e8970916b30d035ca6b9b1db081d06de6ce3325e871c5d0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "9eb488b41ef43d2dbf39a1fff7585bdb3db22cd9aeadf4a187cb0526f596a194"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_100, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm100aKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "7ecb8530d470593239eb3c22809578940ee0057b585c0c3296218d9637cbaf17"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "3a28ab9f97c3b755a55c988beb933229c607d30f447786aa31ca44ae5c9140db"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "ae3c7d72aa4abb0cd00d9ce11213109165faed46bf11ee24263a229918e340e7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "c9391aca869b4a77346d94aefbd49bdd8946a9b99e6e2f7f7602ba5a15801dcc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "5ada733ad05d07af3771d2f17ab8c9758febbd73c90dc320f54222217025b0a2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "34ddcf964454b5c36ac396a9bb57192fda5aa317a25598595f6f83fd2d8e397a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "035bc6ccfef7c29432ac2640149ba2f8263d812c68c3dd4a6721309d65064e3c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "46c45759f03b2b5dcc5531f0f84bd0ee5f71804ce2395c2abb9523c58bf73d33"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "d4644d09a7db1835d829a017aa893d4054a401c9d747a7fae361dac4b1316a71"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "9257890735ff4b0b1aaaa7c32cf01fbb8ea085ff37f7243de8ba9f6b0b99a32a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "65525228d2abd6722e1dd10e8b7c90c3f6c085ac0a07cc4a84be4e2e86c86417"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "4b77e12fef88bf23455d9a1000bc85f1df7f621e9e8c1e42cb1c90fbb3a0413f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "301f0cf7d8848a6481cfe37e9a2b0c64c49663347604b7311497f91907d5fed9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "daeb1caa2322bac0df581eb28ec9f586d30db2e715aa4f727642d7dc5049c4aa"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "5c8d7cbd5a8eeddbb237b7e2fe8edbf3f58a300f2ccc733cbb86519cf40efe1a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "e4e91ecb2c19bac1c0d30dbd12de926859da6616ee4bd5e73e632e6a3aeaaaf3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "89ebdd6b702bf63f291f5869c9f59dc8367cb38053aa5e28c2fd5df97665117e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "1d7576049d08f0ac4c4b33b0361c4a4334fda7ca83bcfced97a6ec275889a949"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "63b59ee22c93c49d49f9b5e96854a8e8aa9e5955d0b06f202f3ebf334d61cf85"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "4f1ab069b40ecc76b0bf6f8fd0913de0a4c186f28af3d9b22ec20522ddeffe83"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "4412f82acfe7772d8f6d25c3d65aadcc477cd100105539e75ab85d678b91f1a2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "b2c2766370cc887912c82000d4209d97f70e46eacef898b5bc41d4c0acc56646"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "6a7a87ae040efd362a541c5110021402d6350fd3fee4869b26861be977c8a5ef"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "e746e974ba98d93dbf6b9b71e2d17746f59935a1368fd4a670894b3aa300a6e3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "0562f2a45ea9280e9f2c5b06d9e9c8df70cf9945d1fbaab73f6c126d1762df70"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "3b58e7113e31f9e9ad83e7b9b8ff42af20a5bf0b5743d9cae4772d38e390186b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "5ee3eed36a65c02aac54991bd2b48b9b63405e01f6437fc1a9efa673416497bb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "3885a8d69958f95ce455cc56cfb41389694c2b80a9b5b2c7d7900a98496d744c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "ebd2205d3f38d8da4a5325a60f80c4d5343262ad6c4ccc4df7b86057a7262036"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "2403040ec31255e3dcd789958de9dc3dd3c7e0eee0daaa8b8d8b35715d948356"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "a9602102b43879ee15cd062e5de86d693933ba711254391d31e5d335cd779e74"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "8e16f9e2e10902f37349bcfb31c7ec458eccda91931b1621499af6dd0ec49033"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "aef807d33e01e1ddff0c465ebd4725fa0e7cfd930134553b8d55623a03ac01d9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "48213be4fe207a033dc7fe28db2cde1676b3e4bc5fb77078e5ca97a6ff26c5cd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "d958ad255b68584d7ec0d5e552cb827fddc0231759ae30d7807cb7cead8716e9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "bf10d9bcfffcb15b08df87d860033443dcaf2a18fa3c48c1ceca768dad02eb5b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "801dee46cf02790049259cc3cf5c0b08d58e034376d519938ffc3876d8348330"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "233d5d1d7c62548355e1ad48d16eee3fcec541028c97ba930121ed1fd30accb9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "b678dccb2f50757e2eefb729dd24d0607a7b756a0f3f3d75f6d8f93e9118259a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "547a0571566205510818e0caa67b246c12ccdae27de34f6417665132b283842d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "5d3316c5dfcecffa5ca1d0c4ccbef009d9a17ec1951257de31e6abdb3e18b429"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "7080a5d47ba36015c56990dd75547c2ea63de97d2248659a9a542aee598db8be"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "9e56117b14beb3d4925f521d282b31866454f71d135ba24791b4a444ebde08d2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "0715b79bc13cda0bb3ab56dcad0c17b6620aa1d585e1abd38880d69ab693edc5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "63ce55de465452c73dffb106f2935f861ba25d5c7d4bf82770d4e27f23aa18f2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "627a398d48cb6e5eabcfc767e6edae23d74d5c3fc5f73563d111d41c8213991a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "e776bf2dcb68692601c7f96e47c04ce56d1c9514d2f4f8ec6fd461247096d83c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "e1bce82a045e64001ffad4d52eea3cabbce9e03555d545c094c492d65d98ce96"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "7d75b443ac2450c2af356b98cd12cd26a0f4230bd739d1a875cf213c42a4e6f1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "17aecb995152547dca8181024b171d9299011c56ff53971f74f4fb33dd2e5c33"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "371a6267ba5a2379b051a27a386ee52310bb5df608668fb055dbe40977025a54"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "7cc4d344013266a7c2dea9efc3c28861c9b4195e69b1e433110bd1500b19cc12"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "b5ae4f134a8abf613487d77560dbb96be6a32f208c1f6d6c974c1b4c4abd98c5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "e899f22f5ceeaf484370678296940b688037aa7c90dcaa722986ae689479db9f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "a0c950cd27ff0c316a32350cb64cbcbc77c66fa63bf8df0759d8cd4141fc6b72"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "6ead5dba099b413481f544008e7eb0150fe6daacc2d905fe1a45da9f1b45f29e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "56d82c35a46a1e54a73f337f7aea7d18e95fd59d5402230aae351a0175ba5f3a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "0c179d08733edeab85082f30ecc704f5252ed96df92dfc47cc8fc4e5c0dd8cab"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "4e00dfe63336a78288a5f0528a77753f430a2527f590d45eef806be47f23fedc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "c3ba861d9f07b3c5563d99a60e6b89860f7cca3c284280e5d5ee4f656f6a9ca4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "f6a3bc57ec6e81e73a7cb5636e4ce60840dd00a3d993c595d06e4381397028a5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "b1e399540d235200407775510fc52ae03acaabc7431d40ba14ccee7f0dd02205"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "41a06f2b0af3abe4487eaf1cd4ce03298e449ba03ae84c42fc6a5220a1d905c4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "83892fdda43f8086ee2b05ebb0385c664d27fe433394c286da11d84d8c8faab0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "46dae84a8785e057dd2913ffc1b7ac7b28fb3951a94e71449f0d32a7ab677610"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "b925fd5916fc3384d6be2d9963249d73e9c24422972f874f1acd3becdf01ce58"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "05204f9692f489b5bfad9bc9e6bcdb152715bf1962b480f794908926915caece"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "e4b5f24ae71b91ec0a40329a84f4e7a5db9ff6cef530841ea39f3360c3b556da"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "d1a19d3908b778b2201cd64269bcb7ed50487b97c2f922cd8d5941f192128dfd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "b7de86a96d6f9f27eb37c01de7b73b2135b8f8aa6a229d1439f4864ff7c320ca"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "cc3112343816e0f29e66c46a53b5d4c6e09f968e106d98d96107f4bd834ac7d1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "7990a1545bc8c12a020eecdc2f2f07a84f39f393bd51153598546fed49d96cb3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "cc122955e5165fb70db55ca15d3016246aa146717083560ef86fdfbd0c5f1d90"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "f3519dafa7e9be755409746dc3016c80ffdc71f1cdd51c44db8293c093b26c32"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "1cff7ef8634a32965a40add793acfda29033867d17295e8c5ef340533adc1def"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "6ce70c75939a46d3b536b7ef930d71d86e64a2a82fcccd34ed8c4343edcf7fb7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "77a03f1848e076eb8ea6ac00ed4b8aff47fd50c94fdcc0cb348588e02607ac27"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "15d004b96f379bb39173488a37f526ec9fb2bbe3bd27ea8f9fa4a4eb57ad5852"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "5406430c0a4226e94a087e3611b8d390857fce7d81ee2350c33b3ecd0106cdcb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "e6ca3fcf94815f56aaa28e48e9f17da7564566cc9fc7deb96eb259a7a503cbd6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "fad5a130b27870bf9610e89b9b35042cc916a005541856a51b6d2395b27f08e4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "313e2c3a43063f021d25c590f6ebe5e95d2242cbc2fdf5a77cb45215e98901e2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "d7db59dcc98b4610c93e80dcbda8bc0fca13b6bc458eab4b175fde5fd100b70e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "34ba328a46a7da389e3407d880c8d557a35d93cf601581a51f94744ad87af1c5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "9a967817f9180ad7be0321b0c3a530e7d555645bdb021dd25208fb2f7a8e548c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "57d90db271df8d398a784988ef8a8b2fdfd13795a0b19c3e7700a59619eb4a2b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "fd28de7bf823d342cdea7b338e8754a29c0392be895234f74037f54073f4e588"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "f45ef03a55fcc7ad0176b3f0cc10adc672f1ad8796592f9ae3b773ee3f891ff3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "6cde8385cb79b45b18ee2a0a2f948baba3b028878c3f8e2bfba9f4f1c68100c5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "e83ec2f63bcd5b5c31e1792279a5126d20a2e72c257491f7a8e9731999a424b9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "2fd99c94dfe81985974a9580da99b41632e04f173ae700d84824de3b4ed9e960"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "11237bf3272e52dd695efe698119c05189a6fd68798d27c52bcd108e3f46f0cc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "9711eef71a6952d92f3284558348a98b0ae24d8ae45a8e99492019f5efa26c75"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "a0c712e0187742929a3788b449c7621519b203bcc2fae689bbb0453d79d30357"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "8c22c3c314f61491947a701e6f2b99423957099f5e2b5088ec711dc285abaca4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "ac252ad14db654047610f59b89e348937c65877baa1c8288d062c6f01cf13eb8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "2d6b90da60a4c29c78b69d2e4ce47c9bbda4d7553ed65bc00db865d788845cbc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "75671a8b5d71999548e1fa15ae0e3e94e19ac3abf8ef1d6283a8b4c2463f305b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "078183fb5ad20313e576ef6781287c40898586099e847e6d4e1dbb6a31ec8841"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "2c5e08e51798eea75644ad6f295a1ecafc9dc0786cc913ed412d838fc5f17b93"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "34d7304fd7d421461d730d53d64d0325a7b199bba4f6c0af591759360eee4d7d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "f4dec2db876b45ee0d35a5bef0421ed64130565687670db07dc97a1cf8134d5b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "5de0daa22350e6f55147bbdd3e78fb71d86d5ba04d990651e20bec14c3f58c09"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "af2db67adb8be3a22a331aac3cb0e37ea6c47cea78630e697ccdb6f098af0e8f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "3abee79392b05a2eec9b9c82f59c813908b098f40eb0a3c999790b84ed75d4f8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "24933c9d149ed7c4013bd3fee8e4e1d8285a7e35da28143f78121144bbfb8d4c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "71397831a9918bd3e7d07aa452e9f8e8a938e85e8ac9b7d2dce6231a4f3fbcc1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "f4c66f5e1f4b553afe1c780cdb1ccaf0f405944c4d7886c5e68270061eaa3bee"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "a5d9a72f80c517f579d5d02a265ba10f962c8a9d5f377b5c52f1932ac739e990"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "9f9771b2dc9d5854c1e196cc81b13c9608d4fc19b941650fd9ef40c335f0aec0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "225acb8cb3c4c87643fe2c66122a888b11cf72d1f556288a03f5d9a8907ad2b2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "2e71b98346657f01853fe434c38dfbd78eb9ad178f4d452c776a4777f3390c8d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "87f2fcfb8ada77b337d19f66b1365317933146c1c3201572c9c40b2a37ad7f73"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 193624, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "35a44f9ad956b0414dbc232a1ec650e3024534af26300581fad91cc103ff4f39"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 205992, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "9862c7d4116642d2e9ba56c5ca836799873c38bdad2339a19561a29ab4898ae3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "87e4e755588fe0b249805bb9cad8de90835634ad71e6d6e1d212f572b2baa977"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 179800, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "d04c1b4629a259afc3aa47e4f76feba873bdb2e3667bf6c2b3b8b2db68f2f0db"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 194216, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "2d3b1ccc14220e67007762da8236140f7d3fbbe04199e471ed242705d9804eb3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "0a5c135985e4b1c63d1ead364fdc8869718b89c102b7e8b66e3414ab1595ed47"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "ffd677b1f4c2524b336e28d7cd04529315a6d80388b5a9a3ecccee7be2ad9dbc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "9022f140c7b8c97f2a3e446692b331875385268e3934d779d298d3cf2ad6001b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "85c0756b878ce5fb5692ae6eae59022d6f7f75bae2589bf6d50f86fa84099a50"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "60dd7f7fe2bd1320e7c9a31f9e5c93e6893c219b922a11b6aeed5e8f9469e1ea"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "944448ac6b274a3020f8fd39498371dc9e298bf3409821809f3221e8cbeb3bbe"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "8e049ea998d8cad0bb52363c08f420b29e564dcb094c744d18ff7870350435fd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "34a39135f85f3d22322edab9d3ff57c7dcf48baf584239ff064366ea583d80c4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "4d029f68aff51dfbb2f21fad4c044667f190f2129279347f9e6e80177012824e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "0a880117e4d1b3b4ebe118efad99a650b8f22655c8a7cb0d7232f8c2c616bcb4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "b8d26eec0f6964f6b8a33428c6cc8c371836369544069091bbaf8b09803d2402"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "043a07166df8feaea498b13d03c3061a209ad25594436c5d386bb1861eca0896"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "2349a52b35731c75628f8135975d8823132f7fd238a2c58366224c100fc0168d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "829fb4fe670a071d72f784e8ca54f1cdeb30a336081d2806a78bceedca4b83fa"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "d3e005a4871d767b32b2c374b398c4c16395b570daa665fc62f7ef4c12191eb4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 193624, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "9d4030487603917b8e4f02a95984323a407342926fbab81205af1fd15b0ab67c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 205992, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "b81b0ed0a77fa9a67ed7a95240d11ef8e5e8d05aead0386f6a0dab07c4666b3c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "034d95580388bc51bbd6198684fe9178b7f74d9e6f1e63d64b382a75e7944c19"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 179800, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "48c913ba7cd7c2503bfc3a385ae4326edbecc793c1a31603c8804d6252dcf4fa"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 194216, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "cee4dc5c3981602c3f66d39db77540efe6663c03e48db48e2812cf668ddf2988"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "b68bb2da7618cb08358a9916e78b2fdccb55a354b13979e792ec441b330d1a37"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "33a9ca460a233bd2a7d123245269e9cd1cd2ce811af48e08c57f94cf1a147e49"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "69d8bf2d5ecac8f3a4d1ba5cccc9cd73ab34176ff7e24c40b35bab3ea4eadfc3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "dbe350e80a7af918627ab816c004b7debd6b105c6be0b092d8b1effa586cddc5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "4a8e7688e9aca7e0f463e479fca923954d3c87667679721a8fadd949c3269b60"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "a2c13e06157bc6c364d79d81ef8d76deeb702a816cc1bb09ec526fe43969560e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "8834e231eba684f71cc8346f72ba7fa0a990acc62cbbb400dbad8b93f0630697"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "4f8a4c443e3265b0546597e43cb8af561d54fe916514475cb735174d4dd2d185"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "5c01cfab3e23910d8f1c1911eebbfc0c4502098be9e2306a7ce91b020ba7ae1c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "5e29d7230b1d5c9d9836f6608b7db4975d6890140b8c0ac8b37363d12789dbb6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "861b31cf81579523e6b967a306d72097e58c28310612703df5f5d3f1586bedf4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "b651e0f427b1a501f74d88703b915ced4aa932c4a540303fc1263adb82f536c0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "ebc29ab20a3b145c23a2834dcc0c396c75e0985b2a6688408ac3dbee1d0a2eb6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "b3f6a56fdf9b32d72baadea6f4730e59f53560c10210ca84a7868ade5888e89e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "ca1eb7b8eaf01523ca36d8729f97848729c5a0f9e79b854919566152dc0f45b0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198728, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "8505382372d79fc4728e71473dfc5dd6fc06398d8fa6049bb311304845fc9b5b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 185032, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "e104b0d685e937d079cd184decf3474caadba97278bad90d947506afe1bc5623"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "66e14ab96c059793433086ae8243843dc848d3d75f5162fb1b9715f9585695e8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "399f488f4b0b5091fee55ef5dbe9ca54f49cd59f446b391a3177c4f80b201229"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 169120, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "5cb7d06f6cdddd053b0832f408efbeff1d4f9cdfa0f2fc3d2125653e4b98d18d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "c318c13b7415186a270480255da460894c32f924a105944bd8588510977ce640"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 153376, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "2fd0a28632240b0bd7b5807bb30ac61805d2c17297640ce0f82cef26574b3b84"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "c517691bdc0e1a1bbf735d0818f6d369ed6fa5877ba73513a8441884b440f39a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 193112, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "df9c05c7eb9c767e421cfd766982fa76ab84c1b1dbeeb0437c37002b9ac04c75"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 205480, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "2c7a49bb39d71917e86d0be8e58215893b0fd28f1975e494746949c11da878ce"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223896, 384, 2, 32, 0, 3, 64, 0, 3, true, false, true, false, false, "e0c0a21c680823550b87e4ff771654e6410b9cd4262408d3cdd584f7a12b6f66"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "2e5f6ff8b717dfe4c4fdedd589b34dc5c7a22f961c0800cfade756fbb89ac686"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 179288, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "00b574a6ffe353b0c57df44ce2d1903b2f89c00a4add06b70146aa3965627236"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 193704, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "b9216889b095664df20f776a4aeb192127a4a384283824d16905274e9b1673d1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223880, 384, 2, 32, 0, 3, 64, 0, 2, true, false, true, false, false, "9ed363776bfda3c342a0a912175050f90d2515b84cf9be29446997770cdb2847"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "5ac988cba4ad943bd1d169202ed5fad323698c2bdbc37e84bac45788049b5871"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "43ddf7989b79c8cfefcb13a0be27ddbd7233e79ab4f955cac36e38762663332e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "1a4dd92f15832be9cefa406dd6a9c75b313ce67082f789365c16a8cc5686207b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "5176729110dbacbeb3c9e61d4c3f59404513aaf3209b0df71004e26ec12782f5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "efc294be181edafd9deabf67903ebbd53e787fa4261b8d8e04e1e925cdefe78d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "01bc1cf35b1b11e6796821cfe3173847a95e814515820e8d3985c1eb8efab5e6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "71f4ad208faf4afd9a89fa25ca8f9ecb3c4685e80ff5815bf61fd24e489091ed"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "a489f978c4569fc9f09f60ae056cceae80c804050ea158df1d7f4e33fd0ec948"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "d6d14e9d7e7078556861eac7c92706c539f51c24ebfc67e037d7f8e8bc5d5356"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 223960, 384, 2, 32, 0, 3, 64, 1, 0, true, false, true, false, false, "8b09a8d8e0654d62c2468c2950aaaa167dcdc0e5dd39aa3955ca56b126baa707"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "f5bd36747095e28e75051970fd2f853622c0074dd79851d563013e382ad3fcee"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen", 223864, 384, 2, 32, 0, 3, 64, 0, 0, true, false, true, false, false, "d23bf77763ca8e29bcb38d41329263c04b85e24e5e7e2b94ca6cd8436675ec40"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "933a48064ed2bb83e7e33dbaf1bf0d9ae365a7a50a84d29999bcd1468e41c9a4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "5657be922a490437fafd9988858af6444dc76d7ef0b8c453ea474c5ec6723510"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "8c285ec588e64bb12af409859a9c6af03171c756ccde7be912fe1cd3d23bb395"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "01c312469d57fc44abf4495631c37f4f0ce4cbe61f03c0f9b33438edd61c7693"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "392208b619d553c11cba81f7fc1e19fd05426ae64477ff8d13ec63f22d942bbd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 193112, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "ba09bc221b241fa545a18d652ccd3ec99bedbf88c0370e3ce38f886995e3d03e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 205480, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "7d0e008a625d0008454be0989f8b2c6f95d317f4ee42a2b5e8c9c5b0def9945d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223896, 384, 2, 64, 0, 3, 64, 0, 3, true, false, true, false, false, "a4d6f69e11801604904ef504631e22a39cec4cffbd8d2b32f4e0479687e6e968"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "f4413fbe18f5a22ef3950e9f3abf67ce15e79e00fa1cf1357583476420fe0462"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 179288, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "833f45ca7bb8cc7fd10a00d48f291978da22f19df0fec0f6b101b9e0104e0dd2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 193704, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "6fbf704795af474db5c21ced277db388f6ae703e0e3edadde0dcb34ddae6f087"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 223880, 384, 2, 64, 0, 3, 64, 0, 2, true, false, true, false, false, "f06e7bd22bf585c7407c505b6612515780a6ce666573e6c7ef32c9627eba6551"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "6ce64e6a5712ac4c90314dbabd64dba85c0ded0042d3042b37b4167b95ee027a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "d20e24b5aaa2df189267bc75f39ee2fd828196053c5996da970b113f16c4870a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "71ae79cc36befb462a5b345b5f966c4d03272bd55e735f16f2e384b3f6a0f605"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "5edb9e3035b96e3983399971370a1d74ed3caf65f81da9cf3e9de3fca7665a1f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "a63fa530dcdcb633838584c242f9596923e3e7affe7cb066f4296c58540decf2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "9d5ba104b64888d28e6d7f0a43d532404cf8d8aab5e5131be583c697f1bfb33b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "7785ae1e34fff64103da967bee77d313a6b6641910e8f2eea85e334f0bab1aa0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "9c74ff8d25ea429a04d5fb71586039e84c7ce803335556c6309837bd8f35ec44"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "77c26ee20e5127230917dfcffb14ff3d59b28c5d02b0698b5dd40d989bd84ff4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 223960, 384, 2, 64, 0, 3, 64, 1, 0, true, false, true, false, false, "ef8229ed9af8a795af1ac4c9cd1402973040e44d385dcba1878055c4907a9079"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "c656cff6e93caf33143b7a9ce8bd9d5d3784c8e15aa859f2bb34beb315c2019a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen", 223864, 384, 2, 64, 0, 3, 64, 0, 0, true, false, true, false, false, "bd9b6947e89d389e3da5e949aba74b01f43c6bbfee41aa70f3258ebeca62497a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "2c037b6b3d433d6ef473371b6d18a5e8b460530305f648f3878d8e9c36dfe135"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "8aa44ba72a3a4906dde6ee1a96d81c2cc2ac5c62222b771b676b67815b8833fb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "3767376910afbb0b4d88107cd9b8d34e28cf6725e26ff5f579d9df8c421f7948"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "42c9447c38daa85a94207527ccd477cca928265ed3dba822f4d8c5b121b63dfd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "ab9936153f2c12d14b620c982468718e1ebeee624f17145702ea78bdfda7ce2c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 198216, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "db79e2fb7c41e8aa04954efe441bc2401c288f44931676d3cb0bba70a4efd9b5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 229272, 512, 2, 1, 0, 3, 64, 0, 3, true, false, true, true, false, "d5918cca59928b44b496f7d747f35122bef1c8d1a9c5664d66cc5b6f77bb9d52"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 184520, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "258320bb363b5ee08d390e28b42cc1cf4c475509e2530f7ce7243de81b240ae4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 229256, 512, 2, 1, 0, 3, 64, 0, 2, true, false, true, true, false, "fc1b66512cfe1d378c0271f29cab0b7e2f891f48b4d7aa139ac173913f1621fd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "e7e54e3353ead00a0fc6290597e6a206d291c54168abf7c854179a5f42736ff5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "ebf1e631c5934087924d49f506de18e70768a2bc70ed9630289f75bbaeba489c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 169120, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "b95b1a2e71661d55bdfd98cbf39ffdfba0925a0a93bb37691ad558d990d87e60"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "e0995e123ffa5f9ee7f542d26d6bcc2220b0cd48b0035ae967e3d147b42ba5cc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 229336, 512, 2, 1, 0, 3, 64, 1, 0, true, false, true, true, false, "191d5db7809954ceb113909514f801ff50ffb243df59861c640325f42902c973"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen", 229240, 512, 2, 1, 0, 3, 64, 0, 0, true, false, true, true, false, "dc5a59e702b393a2cb5e44041625f0d1abcf7820714d74f395e913d5f3cd21c4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 153376, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "c0638c5f61a27f7a3032d4cfbbd683c5f46bad2a3487574157748a3288bb0ab0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "e5e79cd1dd2a346dde04874554cbdcac40f8810f036e47312023d8da0c1e8d9d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192856, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "c6b0384e883327b71445f277a4a3e123777416f3ffb79ae18e5d823e043bfe9b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 205224, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "8d71920f1d628ad57e6d4099644631140b25cbf51dce3f465480905524707b8d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "403faaee10295865a769c4a40a603ea0dfacb79f39a72a3d1407fd1518e7cb2e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 179032, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "87fe84b8cc59bc448393579ebcf90fe5c16cddf550dd89f21f416e35d6568bfd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 193448, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "5d87bd9e6f497afd0f4646db8b758e1101db058e024cbbebc4f2f3d6a8556d87"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "bf5d2ed915d9971ec4c28f85a98c91893b7e7aadf04bb3edbacf8f5770b9dee7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "408f46c49907a85fee42e1e12b5b6717a04c48dd929bdce41ff011abd509198a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "efe37a15bb20e4b4827ec9cd2015b7b9a308344d5f8c53093da82976ad8ca046"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "0a2fce655494c141fa9cfa7bb45046d3843328f61d900cba28a1e715dfebf3fe"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "4937acd12994946ea5e8a5825c0e43313873f9c6c54e951e181c8436b710e9b2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "169af815b2bb89f2a29e0d1d3fc2c81479540b7857fa3008ac91d8ea967ff6e9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "44bc30d96ef032583ad750ee4565eaac194f1c6453fc7456004ff9093fddef86"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "4a49f9506cce0af5d80445ebcbc8f53838f49fa8b918b1df001ffbc68d4ef339"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "31b6fc5fe5cd806e0d58187a2dc7e5d41327e6517431b0c59479dd3920bfd0c4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "0517df9ad58c2250350de6f2a1b5702d5079703f139fc1535fdba723577daf8b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "bede31c2b2806e44bf1c73daeb65063e44324ef0afb7c873cf62e9251490df4c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "575bcecee34fa8ebc045c956ac467cef802f6e039dbb93d3bc4a3176f473568e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "6ce31d3d0aa494d3ab35509405a2eed2dd864e6f95ff2f55991b8f45486b766d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "4bf461939a2991117473340808c3f5dcff17efb176e47691da349d38f8bb4a24"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "1686a6e418a3c3977240318342112cf595e928c5c58e1415230fa1e8554a8123"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 192856, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "c2911ee6370f38bad484a264d55b1ae66d5d1cc956671ed387ed5c08c54184d0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 205224, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "b98b9074e8bc4c8bc230481b44342837cbc96b7dc450ccd4e0579a52a95a5749"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 205920, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "6faf48e28d638252396a8708d2fa47649485d209d8e99f50fdac83cef74520e5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 179032, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "f3c1406c0fac252267937261f4b98d3edd1d149fd98e7295104b2087beace39b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 193448, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "6dd343a7479da2cbfe2dcb90f7389cbac7911c29669441a26d007646519dcd47"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 205904, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "85a4b004333203fc9430c2c32c5a5ddc465bb6c7de279ee6ac813a1ab4a30577"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "6b34f60a064af3cd55b889f41a02befb68af898fa9d9661128b80fcd1c662b55"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "af71ae13b5fbbdd429aee3538513642bbea109f60435e83ab454397cec665fa1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "41dce0a036d61b3446c1e4a9ad912dbb75b55f10fc8199212a038a303f6b730e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "aafa8fc4dee6021386fdc40095eb940f5332912f711b58a7631b233cd6833b73"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182464, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "6c2d520a8e50ff54b82fc3815a9db06df617a8e751f727fcac78ea8e5c415412"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 159824, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "86f060208a8b084c94cd24569991dd1a367855c01eab101f212b25dd69b337e4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 194832, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "c21e0eb18385d4db0e88ecd367a3054ff15dd2e84ba97bdb33384d4f7c952a21"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 172192, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "0ff2d4d51d8656f6c986e0a195f1f329924ff59c5d1ab75c8b2f450849450198"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 205984, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "76f5404352b850805749effe0cb75fd447f4897bd0f42eca10403df047769fe5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 205888, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "a80d74b6d5d94094ca10db5f4a44f2a8c88b1f61b12ea682f049403270dc7c32"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157376, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "449db820cc72468dd68cb66a0d4cb1ed1449e1758f1d2e612b41cc622e683975"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 146000, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "ff406676d168825788b7547e3c0e03d3ce1e64f5d5a17fb1d503b1418985ca0b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 171792, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "7c123447b08ff357a72f617b2e7a274b4552f0877b12d5b9f679dbe691d89924"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 160416, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "4d931c0171648312d9a40f071009305ae20b3c86be34c28b0cb922ee5f320777"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197960, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "ddfa799b0862989d99ab1f003531c583cf76337566052708a2801ea5f700c2e4"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 184264, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "4e35bd906dccf9768693f799b64db43d72c5bab92be5457f2cc3ad3a321e89c6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "64d028a7c36b7aeba32b5920944d98b36d2e65495ddc3cc0a238171e0b940180"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "92c0a63f353ef7de7e7af89af1f52aff86eb35f73822cea2c5a3ae5a7a04af34"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 169120, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "f821c1b2d7c87d84619fbbfc2ca4c9ed1dfe0f58a19f53d426d9fb9c32b72744"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 164928, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "df00b3a5234f88963031a75d433151d7ef3ca8efffb46dece3e23bf531cfad27"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 153376, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "830783bf4c9ecfe901d7779358683efd8aad701c959308a318e094eae03e9f1c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvBfloat16OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 151232, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "3045c5129ae5a6e4d57148e26d6ec80b04b203b7fc8623347f5179010e88eed8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "bdf40d0d3c3848882d8528f39ebaa84875ba5c41d78d0993ac9d72e417f30c90"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "8c2ac42614fde3ac968d5467d819680c5b6b4989b5b72189b83052e4597205b0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "cf9296db96c877aba714272861412ffd2c2930746fd669819f089595da576bcc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "b9a44139ebbfc8b2de2956fe2beebbeeeadbb11188495b86cb3c4bc97a22f594"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "3a00e2f6540f4c508d2fff0ce05cf25da6ef0e66124ffd5c6232a185406c45d2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "098bf66d1ea12eb17772fe7f9fc802657231c9e36505dc6f13646f7fc462d652"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "addff1c835b688c0f7ede2f31af5a883d69973c0bb369bff42191dfdfcdab3a4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "338cf1ead3a25458a5ae9e06135953a1464a51f320f0477b97714b3846434ba6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "b83773f1522a204719eab0076557a438c7ab1530555a2a7a0ebacd3db7fcef55"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "042b65fb3bf5618397ce160fdd45f89cce796f4747932c732198f8e5e756c130"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "45f868fb17eec8984404c2c8b500cb4081d30dcfd22fc3c81d9516e55570cdce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "bc490ac9d4c7f74ec214a78140669e427923b28d272ea9ea1b65755577b98218"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "f12addf254182900ea104b912eb13a4da0abd1b398b6d6a46e1657b3e12dbf61"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "43a6188c3c1f014e7668c3f299712b7040c7ececff23cd523e873295b323f23f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "3c7347a74bb972dfd1edfac6d3664885344bc0d34e114362c511c81a2e3b6c37"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "a7411d66fd78b3d93c287b02733397a1640b4d635626082b7ef40f0d73d076df"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "149cb4919807555a77a0296eb17d9656a4b8c88acd1bbaaacd76c1753b37f301"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "4b77810b0db44086606e530b73e2ec4378442f1f8d15d7db0f69297fc5b20e36"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "55ad12406613f14b21e6db77c6b10a2acf39cdc69e6d79b8564d8c5d3bfd6dff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "d2c6c95172d8fa2fce4d2c6729fe1d127df34d4019bd93c7763fee62e0a94ef0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "5a5ae82a46195d28d1163f053aa5795e5f71fba126ad6bf624b1156f78583ba6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "6acea23a42d0d317d8086c351ee4352d807dd5a0d7e932c38667aaba9fa858f8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "2e3c0cfb846abc28b0254d28d04687d436472269c060105e4d48fe3baf145c7e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "722f7eb41b0839daac687b1124d95a77a0fbbcde299b7e2585605cd27f5e7a26"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "68889b4fb56545a0faf4392b87dcfcc8aabd2bc601f2c547028355a4c2445b15"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "e3ed1a734695cf24526236dd61aaf5b55bb0a427bdffc1118942d1e22afceff4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "2e5fffb4d384b685f1789686213569ed79729881564425b3ae96c1a22f3123a9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "bcd06ac54e72561cd5981d9ee749990003f70cd6998ce110e032e70c7491be0d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "dafc542d42d15b6a9d478ee465d04542c59893a01e7aefb26b6019ec193571ff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "7607302f0d34f1369073f9969bec37d9d31697bcf730ab00b405d32c265f98a9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "16e6fdad817653344b5b3316cad139f7b7530fa5f161007b7b89b6d5335820e1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "84551dc422cd0d5d0c0c178c12f2390304b11808c5f582333198c1bc7c28a8d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "a252453afb458f4b1423caf9baaf835917d807897a79f35b05f5403496836811"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "41911f1ca8b59ce49c51862d70517e104453d973d53488c93be6c3f25191b6cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "d430acbbefeb886ea7a6535bd346e1b3afaca5e248aa647bb2c8f755534e870c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "a5a981c01913fad24fa012dac6cf34d342fc9e35b8b3deea955f2652637c175b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "1170e6a336e59129f23fd41ac8b1461268637f39374643cdf6b7327f14e8719b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "46bdcf9263888eaa8ba2b2ca7e7157688495acb993abeb6aab29c8acec11dd1e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "acbf0b1a749b8d7852b894d1aadb5bbf92ff7f582b59b010c8e4d849db55088d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "8421724b73416f78cafa9c03a80c4e00d6e0113041fb64a62843b7033d3d78b6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "0ad84d801302c47d188a45c5ab6b799c4253db9567ed10b8ea56dc94c52dea44"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "7681865590886f262f6ea0bbb332a4873f7b826c203ca90bb03a5af278470d35"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "51ea4f5f1e20f1f23403cbe654a87c480603bf01f94bd41121bdb840141ff071"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "4d1205f8b6d6490e93858881813b1e892a9e1d075fc027e15d029962757c33eb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "acb569691c40cafc71e64411b658be829a3ca003b57ed950659daadc2f8d5a8e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "a3e89dabf213d8d05411655fb3d5d8d5ca911995bad3f9ca7256065f278f614c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "fc41bab25302c064c09d6576d1d8b2452376c8d4cad2c0d4c43d6e658cb02355"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "3c27cbad945294ae7c61abde73df75b7959adfad199679c1715cdfe5bb87f5a2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "f2b1be254979d852cd393410e3995ab28eef7db342866d9d52998fcc24613aa7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "2bfc7d96187823e8cebbab8cf138dab334e6877dc06749d729386c020d5a4e3e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "c651335fedbb8c0268e3dafba3af8d7576bbc27d20111b95aee478c846288c44"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "b6695823c0a499d93eedba4d2911c916e5d8f9adf547c4932bc8d1382ea2837a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "33aec117529b6ab479194bee008bf34a67cf7b83251d0442a93ce0436563b6d7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "9dc931339f3d1852acc76f337629a995fb601552ffbfa6f9bdc5f54f08b4afe7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "d678f175e0e909b6c52a51ff7f4b5241085503ad20df9e6c6068d84c3e492b32"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "caa0a07ef446e23f154df3b713cf1e6ef9f5db3fdc52a8c3d932627fcde03a1e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "6c42b1f1857b9a20da1fd0dd0da2d5ea961d4511d99208b565bc016622ccae99"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "f5bb56216c658e9bfbb37852789e0baf431fdf4c5f961f1de7727c034f9df8f4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "6640886b672347bf8dd4f9310726c0e0afd7d5575e752fd205ab93743f6b61eb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "74b95a4f6d0a38ea0f77b6fac0b069c59da58b7d2b5c7b8927d7a9d6b9b804e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "0082a668f4b370f4bf38fac6ffb2c6732dad7b24a2b3d39ceb4181f3e0ad12ad"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "bbd2108615a37c4ce3758199d34155132fe207388857b7bb26e8ecf94250bd92"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "20326e8499e448fac0f780d68fc51409de2964acbc57a7495a9cf92e183becbd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "50fefa87ff004dd09fd5015ba8c95a2941b000839aaa701663eba3a50f9962d0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "a2cafda22c9e63fa859d4e7f6f16e76889b9a3d8cc50a9029f5b7fa8b0b5a5ba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "a6715ac930c0537fd86b4f06564dd84e6c34f75ec9cfc9660ef74f69e788f957"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "3a934e9e0bc070bff1bbc52e3f6f5cd31333d4d904aaa0762c1a3777d7160d06"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "1267c49e462ec0687fe37ad0fe20f07b9bd00594ac260091441d1afeb43987c3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "52df788ed0fb93e9330edfb91edcf580d8bdc4c90768e0ca3ad138ff198bfff4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "b9fc4927bc06e5c48d9ee9edc563c7c7c9f5fd420a835205e61d50e27b74cd3c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "26b2dfe09d0bef2ec3678295811165a09c967435917c0a1fcd9703dc350b00cb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "d70e54d0ac92738fcaf7b4fa7760fee6320530143185b145e090e53be5540e13"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "1d2b6d98a8f6ad70845c570aeb081152f115f66f9dfe8280d83afbf6735416d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "30fc77575147dce034925e6771113fed9732c12b6c91a8a37f3ddf0e3ddc86f1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "21937fa805089cce2d49ad0def6f945df5081d180cb71b9a240af5402881bb83"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "6f84012d1e2ef5184a42a83cd325d08991a2b357590079fb3ec116e9ca1bf68c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "4438bee9460bdb2a1db855cf2c276f51c543748cdc6c1ad3c288a72c7436e344"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "115ee5d04eee67a483e2e3d7d7febaa594b03e8abf444f7eb1f24f0992d16568"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "46f16364f5e58269bccddc7459389f40aa82f97c468f82ae088e0a24ec114b41"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "71ae3e7b91a63a712fdeb9107abd562d74bc6121ecd4f2845ac655a64964e634"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "16e83a9a1a9a9c9181072b359671b2e64159d7a916de2b38269ae813b8bad90d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "d9c7f46e893d7a5eab210e841f8c4308eb8e9c95888ec9abeb6dfc5189f65907"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "73efaa2012dd01f70e18875d7275c8ef2cffc8938501b73e2d777cefb3841365"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "7bc27358402cfbc7bf29b54a2c72b549ae30c52cdb0385812b5520bf29a6a594"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "b451b3da38cd1215d9d6398b22f442a3961bde3bc1f4bf18fd86721e3c38cb38"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "2450f4055536edb970974e05d9a8afc7712e2be56db1039f491c45d05933279b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "70dc4d651d1bfc89f62ab441f6068dc18ecc39b43fb58e686926e9f79c53de14"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "5560358c8bf9a0c1679fc48ca8f404974625b7ccf649b2a8e3d9207fcae259d1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "b6e55edf56bcc1c63e9206df26b07875c99330fe06fac9a978bd8a88d2fb8f44"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "b2737e836a90c28a146d1d6408a99c062e2059399d28d55a4e423cb304d1994e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "625df18485fb9e95c9a89d58995ab87d120eb3ef6589824f402ff263755fb182"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "75589e6013ddd9d3884c39bead244f348ef77c5352c575c494e0b247d6374409"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "137b663b51d80cc2ee03ea283bf7219cb7b1473902be4f20ee8432f638216f14"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "6441bb4f5a993144ce394e845048be838eeb7f505177d0f3814bc1d83293e97f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "c17ab1edc6fe3039ae393e1aee98110c1c127f29d27d7885a6c860438ed0d661"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "4849807101e17096f1cce51a5ad07f80111f66116f0bdc6c6f296ddf88b34fc4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "06437011d2caf3882dca3f8b651b07786e39c3d329fdffadc2270ab2456a1b25"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "c7162c68ba0dfee1dc2f6b307bed5a844aed6b64326010bf11356f3c70a62a2f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "051734d3332bd63e34fac554aa6dfb9b5a4dc54ad807ea0dbdb3570342a0cad3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "74f477ebe0a2eb4ac7bc91de6aa3b47d57342b6e0a7903b3496f015e55dc4a68"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "ce6b03703a8a472eecd06b1e8ff3c28683c9ba67546495efae5551cdc5dcc041"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "69e02128a1fea766a2f895499e83aca90e157cb53a786820a66a1a9df34314b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "b63c0a55a29e474bca84012c21dceccd24252e6cbfe248e41a927226985e1205"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "60485170d6d91087e1ecebc75a5c6a6095221d40db4051fd80814cc38a13652a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "9c3a86e1a12a8e0e930e15154e1ddfc6be7935d64172702847f1389ff1eae7a4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "901e8cb43a3d105c273a3037f04be1cb1aa7f8faad10a41c805812f9a23a53b5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "c71d0dd8fbd1603ed55c90bb19e458758e4a8e44b3e4ba6bd6fb65755ba67d32"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "fbde65f9f8241dfeba18eca659cd6b172f54d504240b69d7473580e1ff48c444"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "77bbba88e24e1481951941c1288e8056f3389c004ced10c197311051baa3f4bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "7f7521923a0f8e534c045f73122da6f1eddc1da8d2033a4e197fe19c53e46707"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "59bad4b9c0aecb43794e5d11984d85ccda758edd4c1253be8b64395142825244"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "b83523d2d71bf2afdee7f21488681995e447a9e6d2b0b148fe73ffa9d68f6762"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197800, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "de98159cb6c13d9327bc2c6fbb293ddcfa3bb8a81563eac5549789f414619640"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 197944, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "3a77bc4502bbcb2e9b27c263e6fa9b9f537a827130e67fc231729522c807a05d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "a50d32a642b4bd4cdd0416ed709017de8374a7d3874925c2961d77656ea05849"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190120, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "ac4c8b24b4b218237a25b5fac3b0127907d64281540ec75d928cfc79f074a6bf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 190264, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "95218df3910491fe763d9f4288dd59f7de50679f1e3da143383b55772ead01d1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "89a4a412a40c4af6d88a298c07090086b7626237233698abc95c3daf8fb00793"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "ca123b4d93c1c349e6e6d5c8cf30abaa3a3871650874eb5847711a859b3d6346"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "45f3b8189a10010917f207014024bc5906f17d2324f6bb56fd9d6df92d149593"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "7fdac76b17eeab721a87ff785d707367a0cc4ccc281a3cd1eabbae133422a62c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "70e8bee779d7367348c8af5b11b34597fff4da1937261d089d1973baab32dad2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "4c461e9499628a32064752e622940f6c16094ceb0c2ef9db6b3498b8c427d5dd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "3784fb9615d74152f4a875574e144ca517a698509c3ce343f20bb0ca92536ff6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "c4818bf350e064bb38725a46ffac514065f6dce9a043f1ff2f072984e2989934"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "9007309585d1a3e389e80e5748774c69f6cdde3de715b9bf816a2390704deb99"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "11feac1aa376cc17dfb39a3d544b22dcf8f555be8a233d2a29bef1c56eea777c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "0d2ea12fa1a31727b558ec7b1c7550795d33f83576a09e866f398da055f09045"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "9f028d4d8358b6ffe79c46752d401ecb664a854a4957df6c50caff4d54ec0be9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "090c268f363990e859e1a3a3ef95d5e7816eb3f5c9555fae4d89fadfc93a7610"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "a5530d19e59b24ab3f42260487eab5fa4169a996ee55238daf89cf7cd18772ef"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "9f8b7255b09e99599ab8472fdb231e5dc48c42a1034f6fa674504f93bc889b95"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197800, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "dbbbfcc721ee0a2311fe68b2475b402da45184abd120dd92a710240d7a164100"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 197944, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "6772a10d8f87227d9c2da5df40795f0f35b7303940ec06d89ae32185a3fc0277"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "924bb8e99f614607ea5d15bd2036aedc717c6729200895abe49971227efe5193"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 190120, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "fef3003c50a62426d4c16a0a4d5473ec257ed9a2ec277eaa6ee06003f43a12d1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 190264, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "2fbca83e0ca96b4450c234ba84160b78fd36c6d914c75391db4fead7d83ab107"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "aba20b21e2732ee6149b8ef6fba86f5628699acd0360996dfc4f16c598af009d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "6c24b7539d87cc6bf51767946f9509f30686abc57dba09abf4a4d08de2f98a8a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "ce868cbd9b5bdf9e4c4c39a72cd77f0f41996597faef5523107b5563ea5f35d7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "07a0a97658d517a55aafed1679124595b151d729245180e24853ada31255146f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "324b7cc6959d8f59c1d368ae4f6b9bb64c4dcda2a088d2f64d3990cd0e6f4141"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "e0787e3a2a2e3e43b8f7c3aa2002806701e0a8a8e6d8d308cc3f2a50c08ee244"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "5e0614e7b590a40037f044cdcbd001ea6dd2a3a33c90e60f72797a4f5e276605"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "c1d6258c40da093374206f605c4e7d9dbaa3ba47b8d8a173e1b4c1a606a679e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "e81b062c5e9c3de1ad9090f3eb69fd74ac38ca3a917c6b74715a4a53896b665e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "d68567b6882d8d49d56077dcf7d4c54c34ae121bebb35177997b0d031e510de3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "ffdfd30db644c4d90f46ecaf7c5efd5468960cddaa2f7b658d286e8a0c65c6f3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "bac7f9b0b0a3aa6f001edc5f0306a72b4ce52eb61ebabba7a9b709e1fbcdea06"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "776a2c6b283d1bc96312cbcfc59dd53792548f58c0aae0bd84c6331a54d5bfba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "7dc632d747d8270164e372652191e0cd68ad27f9343df89c591e99398aa4f5f3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "559b9135aa41998947bd517e7f871c1e437a2cab7c385aba56f67020a081f8b4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213160, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "c0137e0af367a785c075fc7908d95271b5b54b17fa38c6b0b2c035af2fd67fd2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 200488, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "0a8abc8dd96a2e84ba55bd575e719ea5e5d2f7b73b0a12cc5607fe6619621661"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "5d9e3601487d5f98b19010ee2d94e6ede058868c56fbbf2555a8528d964fedf5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "69fd5eaf0069dc716d110c624bf674ce89e001b4cb27b13369a85c70ecbc6cff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 183552, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "1deab8fbb43777f86f1845b49ed365057d07fb150572aadb6fafe2ec081b24de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "d7fe9ded196b724d749aad0caf79effa563e62cca0d3ff04c850c960bbefa361"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 168832, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "c69beb39a463c0b09e7a6b00055c168089f1c41a2715eee3335bb3ba65800efe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 128, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta128PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "cf02b0155ae56365c9ed8412e7a53daafcd73a2db84546c94dddec67a194fc0a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197288, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "781e0038c562b2dae5b6a473f83899d3e10a019b958616da973a24ba0802bfbb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 197432, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "e54ef4def90c64df1ea146dd0e3bc216ac1be23238d14761e1e8037543247be8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207464, 384, 2, 32, 0, 3, 64, 0, 3, true, false, true, false, false, "d845c4dafc46aa2a3f04617fb73afebecaa21ac1889a1bd7844eeba91cbb5362"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "d5cdbf19a074b2b408bf163ac545789945b1baf07ba52ae65461df4d85dbd374"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "b32ff9cc34faad5bda9a13f61690a64a3dc1511b3dba0255ee63e81428ca2758"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 189752, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "4a4389d5e774b5830144b820a5c6f6e1596064a3077299240101e37e00deba8d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207448, 384, 2, 32, 0, 3, 64, 0, 2, true, false, true, false, false, "0f8fae4ed53e2845c8078d4b1c1a80c28bf0aa5e13501299c6806781387539ab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "2f8ce1877807ca962f2ebb17e87ea07b2b153bba992b23f21199e1d59ab94034"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "455a9661f317abc3a8483f90eb3ae622bd2da1cd87b17ff7916448c3882bc6a3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "1423f794d5e940a57d1b6ce9ac135575cd991864d519ba025620abef08902bc8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "536e5bd996873a3cc58057640cc222c3c87202daa6e22f88157acc9a12c4c4f0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "763fb93e882203c85a554a1cbf2a7063c5309a7302e70335b27540f6271f37e9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "5461dcbd24a81e4eecb841ce6fe9a9ce8c38800f1ad7b0f8ecc093f2896c98d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "168a110f2239fd11d6b386132c107531ef27d1bb76b59c62d3d195361e41b4d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "781bbe196a7246f6cb177ed26cf9dde5ed3d74b35ca83e11ba00b9e43a158232"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "78713bd359d17dbb6551125c7bf12ac2783854f0907f3c3e8699f8cfb7db9ce9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 207528, 384, 2, 32, 0, 3, 64, 1, 0, true, false, true, false, false, "ede8fd47c91170b66f75f4590f4c86ec601d7ec99b5fae199b6b76dd414b5e2b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "433354de9c7982dc129081fa53f1c828df40f2199e7dabdd7f07fef913293f7b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128Static2CtaKeepsAbForGen", 207432, 384, 2, 32, 0, 3, 64, 0, 0, true, false, true, false, false, "a0722fc7d3d706f890f8d305ea0499161c042e94879de41e083d79555150b750"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "c9a213c60e004b36486c0d265788e77d9593174246124bd08eda9445767bec5f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "487f653fb029e6861687667604825df816e7e965a3e166bc84441ef19889c04d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "745cfcc6ef6f02b22098ea6c2cf5d19fe1e617bdeeaf7372d20d6ee88d33eb27"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "30472bc04877473439cb2b956731d92e252393f22a17c3db43798cdca7fa69a4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "7b289b24fe76114e443056be374081953652fb82788326a6d469c6a0db5efe65"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197288, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "8277692a321f1e66f00b17ef05e85d656490cc27b7650ec4afa329d67886cf83"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 197432, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "86063446eccc0a3bab5c29a00000f259be81859d5d599c9797f7799ed3a762fa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207464, 384, 2, 64, 0, 3, 64, 0, 3, true, false, true, false, false, "ffdc96f14548beec32dbc8f954f501bcd516b581509aa7b36df8f3c04df36349"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "fe54e562fe6a769cad9e22c344b8b64475d233ecbb4894ed7d76e35328c17fe5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "00e416e9ab313491a1359e6e507351ab7d61f149d3ae15a6577bc270643afed8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 189752, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "6d86ef06063f4da9a61eccb77e0e59418e726213f75b9b92a54b737acac5d3d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 207448, 384, 2, 64, 0, 3, 64, 0, 2, true, false, true, false, false, "5d394eb7b0e1df0e4b703616f463f8a32263eba831431092737e30b8b197eb47"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "abaf73e9c19ded947366966ec6746638d1848513061101218e16cc20bd0c41e7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "603745e9538b0208154e736c4d064b3bd8469ef401f38b159cbec9e4d9037924"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "ba5f789f61168e79dfd25f42c5d65907aff4c31cb26f8e412956b7c0d86078b1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "d24814515c91e8ecf394dd63fbfbc1c38bc8fec83636b6b8d3db71723c5db645"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "f173d4dd53b53d8ccc504bb2cca629e0a2208d4c10ddec86d24ed587b05db275"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "7e5568bc8ab67a2cc0e93c0760eba60eea0db1d9942e48a38f54d73e280e2d61"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "ef4c969e0cf82e5ce187e4ab33d1991a46e69d57066a2930103ed805cbf90def"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "6724cd34aa3c4c5a73fca5a1f9360aea65d08716ca4a7e5bb79ed29a190ea1ed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "25ee7887c5e65fdbf4df12558bbad29d4f26b0be3ec65cb450120474e6023133"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 207528, 384, 2, 64, 0, 3, 64, 1, 0, true, false, true, false, false, "5e49a854351343e37e1a8ab376d9c90c8af06540f27bdd1047f48b060c7076eb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "2a05ee0c552f482a43d9db3282f9343db42d813d03f06594296d882f14cb7b44"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128Static2CtaKeepsAbForGen", 207432, 384, 2, 64, 0, 3, 64, 0, 0, true, false, true, false, false, "a3cc2058b242731d2f9813b2450c2fe2a27a7de846d9335c0d0aa10384dc10a3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "5ef8186a17048c8d57ce27a94012e6cec759c63a0239c9be3ee943d1fcf4db92"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "f363c0b612c80bb3dce9c3bba0dce3c905a5c005c56b4f6da3d40f5c1fa68f0b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "f5449f388a56d945ade0c08d0e14acbedfc9319b7130b69d6fe663a2071a3d6e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "203b16de8eb0332aa15f0b260d77e12a11e0c7e6b5ed3388efa47738a332b31d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "72ca14194f99a89cb957aeab35c9860732dc6d154c086c2d59678f18f3d2a824"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 212648, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "391dba8b19cc995a1788483a359ea9fd08cd2dcd97057eca61725dd794994060"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ64Kv128Static2CtaKeepsAbForGen", 212840, 512, 2, 1, 0, 3, 64, 0, 3, true, false, true, true, false, "0d3f54046afbb9cade76ef171393147323977018f62fe7ff77c14a1939809b7b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 199976, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "ba16a272805f1a56fcd165ea8944ca0fe4bd83136dc5ee6cd15dbd925fddbf89"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvGmemSepVarSeqQ64Kv128Static2CtaKeepsAbForGen", 212824, 512, 2, 1, 0, 3, 64, 0, 2, true, false, true, true, false, "35024f3c6d6d8f569bba5b7596a77f8e3f238b3e59954090ad95502faac249a0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "0243905119fd12b9c74979a30a65876bc3aa385e1f8a1f99d68bbec272ba609e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "f07cb71c29ef80b6c907efc5b25b2fee8499b6a814652b2ec9c1a5a4ca1d1dfd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 183552, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "97cd444f587017e5f4e553146ba3ea7b9695315aabf274de603fa0a498063f3c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "370a54e0354b5eb8330bb14471bee610d46300b45a725f4ee7a65f04a7d127fe"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Persistent2CtaKeepsAbForGen", 212904, 512, 2, 1, 0, 3, 64, 1, 0, true, false, true, true, false, "e58b5bd4ee0e45c0ce7afbe189b6fde60a8bebdb8f4acd9f193cb5f8b286587f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ64Kv128Static2CtaKeepsAbForGen", 212808, 512, 2, 1, 0, 3, 64, 0, 0, true, false, true, true, false, "b699d374351ba6ef0758c0b32e460e253366628c4e365de543ecffdefeb89b98"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 168832, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "fbe11f7e0b3cc06daa44e8302a5129ac8268ef0a8051c2edf05223c161011604"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 256, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta256PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "a364e2f203aed6da53ffcf428c62b76cb5c77729312631643905dc88942e8b7a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197032, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "b21035c4c504ca3f90ca1ac5196b1987b3568e21f25be69ab5669f83c8352f79"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 197176, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "404c8e5e00747238ef70ba408bae9685317381a4da78852744aae906019308ef"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 32, 0, 3, 64, 0, 3, true, false, false, false, false, "b013bb6c7975b64fdc6037d96fe6d6cb3c99e6ad4c0603e0391c2a7ae3d16fea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189352, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "117b63020f48679a8c1129f1cbe0b101f9de84d59154cc8ab6cd4524f8aae690"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 189496, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "c29afe3f0a7f748c0d0b2e2e08a6d085c7643f0a50286c489dd8631b60fb0af1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 32, 0, 3, 64, 0, 2, true, false, false, false, false, "f5c657a3d53e5ea5e2785751a7b1cbf5b129d1b05d410c38ad426417c2f74e36"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "55995ec812190b6e697d7c7e947e1b3eed379f210f82cea9eba8ade8c71af34d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "4281b70194674f9e66ebe57c8b134f62faf033b1753ec25453c5d8e5c3eef3c5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "0931692c149fdbd77b86a56ac9b0518c1b2b3cf74b55c1c9074be5887e38fa71"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "6079b650acdb1b06b337084ea1f76f17d94949f80b774850f783e7d2cfde36ba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "d40d7450cbbaddaa34571e8e33df8b285eede26546d27d6ea865e506e38e0fb8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "3340c8c44c94e41d48d21fa01f42604f2eb5ef98a2efc4e2dff62e795721184c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "9b2be9f5711ab3afd4ce0858703bb8e8c0929e1e3f2f9c1abb354355decc8d45"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "fd11f5f769e1146d441a3dc46b78f4398dd6599f029f03d6817b080487355b1f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 32, 0, 3, 64, 1, 0, true, false, false, false, false, "6a6c7461298afa9e5d4fcee441ed673ff72980bff615f65b8770bd4a2c096ec2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 32, 0, 3, 64, 0, 0, true, false, false, false, false, "4be275656fdbb88c85934d4bd91fc742311bc2c4872878817653dd057542beb5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "85dcb66743d45bb850458804e60a99451f24fcfbce8cea99506505a57d9a9cd1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "e5054f6a57ddb8cfc6a4f95ad30cab1f6ce067965240c2cfb645a89c73619493"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "813774e07d5421a11b0e393f504632a57fc1c08251d377ac6ef9789231b61507"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP32VarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "0e2a47e527022b0f76e40f5b4abdceaf31de1e69cd045212ef743713de68ff14"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 197032, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "25b86fc05a4e55c3ac4e6b5c7e3f9a5fa2cd8d3cc407fcae273f3e33208aae57"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv64StaticSwapsAbForGen", 197176, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "64a3e72e7b130c0204302611549c852d387b0b6492d6c1338e605aae771da76b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ64Kv128StaticKeepsAbForGen", 214192, 384, 2, 64, 0, 3, 64, 0, 3, true, false, false, false, false, "98dc679e81bc4c9f30619b8539ae0032b621a39d9f7be56034734eb09873daa3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189352, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "2138be093a1f28852f298ed2e90c4abe5a0fae781368d24a2f977f4599b111ef"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv64StaticSwapsAbForGen", 189496, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "19f82fba9820043f74d2a52c06babc51173b8eaf44013eea30e304d04f165cbc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvGmemSepVarSeqQ64Kv128StaticKeepsAbForGen", 214176, 384, 2, 64, 0, 3, 64, 0, 2, true, false, false, false, false, "4d405f503bbda062cdcc503c9dcc7c5960f9cafb4ebd313d24299e8cb3b79c32"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "5f94e473430d6e0c488c21b75cbe0ae3064c13739983ed98fbf3cab4ecf02a2c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "d10e51d0fc7125790dfed34d66a7b2dd19b0f2e4ad82d75bcad571c6d53b40de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "a03d803fbffb00d2e8f23422b425ba8ff770821c93b152cf8041ce0188c6232d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64MultiCtasKvVarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "fe3403ea56fc13aeeaee5d92ef6ffb3c3d2fd01c18bcef6d96974117fbe84cb8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 178448, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "28d4e8ccfd90bbe3f66afe974f210ff332b2e3d1bd206fb0563d45fa8863e54d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 164000, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "47bf04d4c5f6092dd08d23ef325295ff4f2829cc3c3f1c12cfcb0cfb8186103f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64PersistentSwapsAbForGen", 176544, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "fe49d82238b2ddefc816465821c405824163b3db2cd66a466c9e70f3d0c228fa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 64, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ16Kv64StaticSwapsAbForGen", 164144, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "2a9ded41a2377757025996b9047e24616080ee783fdb72fc0a7cc6b093e5455e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128PersistentKeepsAbForGen", 214256, 384, 2, 64, 0, 3, 64, 1, 0, true, false, false, false, false, "157848e8491588b40a4f1c5f73e4a4718597e9dbc57445fc14be43c958549921"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 64, 128, 64, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ64Kv128StaticKeepsAbForGen", 214160, 384, 2, 64, 0, 3, 64, 0, 0, true, false, false, false, false, "2b5d32ddd8f1a36f044589906156751df2e80f2284dcdd21cf96c41c3d3c79a1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163600, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "e908da282db9339476e8af5107e37a12816065715e4decf430e386f8a4ac1bbc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 256, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 156320, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "95bea757076e4f57dd5e9cce89cd172e1c17653e5c7cd828da389a9ff9adc6b3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64PersistentSwapsAbForGen", 162720, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "0776341eb0fb4776c5f0e658bad8f028782cf7593cf5170f90942e11b501651c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 64, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvDenseP64VarSeqQ8Kv64StaticSwapsAbForGen", 156464, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "80b56e414a7a5457a985d430d4aebf71ea58694aec6ec5d9f236286140955ea4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 212392, 512, 2, 1, 0, 2, 16, 0, 3, true, false, false, true, false, "86dc9d6e5731d3561f90c0aaa8599e95877c8e648ae61f6077e13269e78e8f54"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 199720, 512, 2, 1, 0, 2, 8, 0, 3, true, false, false, true, false, "03e644496b29e961e0648d19ac06ee41d8786c4012a733d268a27b90a2e4d4de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 1, true, false, false, true, false, "a6f0a1b71fa2b1310245f50a34dc8db0513eb643fc3859c8504f01e3b6dcd77e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 1, true, false, false, true, false, "f5083301df38527fab93837be4d9b50a38e7241ed6314acc5317fca2bb864207"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128PersistentSwapsAbForGen", 183552, 512, 2, 1, 0, 2, 16, 1, 0, true, false, false, true, false, "e5b6d2a4bf5438ff70a50d679d524dbcb5ca980ecf84ee15e75df5464aa2b5b4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 16, 128, 16, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ16Kv128StaticSwapsAbForGen", 179360, 512, 2, 1, 0, 2, 16, 0, 0, true, false, false, true, false, "0f771a8b6c329ec615960ad9becfd0097692dcd9fb2269960ce78155ca9af16c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128PersistentSwapsAbForGen", 168832, 512, 2, 1, 0, 2, 8, 1, 0, true, false, false, true, false, "00ee265bff1ebfb70608ef4794a2dfb89ec92edf0dc5f440b6747b8f8d1131ee"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 8, 128, 8, 128, 512, 576, 512, kSM_100f, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OBfloat16HQk576HV512HVPerCta512PagedKvSparseP1VarSeqQ8Kv128StaticSwapsAbForGen", 166688, 512, 2, 1, 0, 2, 8, 0, 0, true, false, false, true, false, "390812f2629811942fb507d9a9542d8e64f08a800a3e84433b04778be70ee8eb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "e1ce70fdcb28a12d838bdbb20790072d6bd027a2ca9c5932e680f0d9b9fa0ff9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "8bf09dee03fade9b7c863845281548230c8e86eed9fbae09e6fc831f64a3a91d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "3c22374a6e3eb6dfb594fc87e10ffa5c3052b012d7eb399eaa0726d0993660a0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "686a8b55f7390c09dfa8e8d967cbf3e124ae744cf9f9e6e0538792026f28b67b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "61c41cde5719890f4f67f6f42eb636aec734a44df7fe9ee015419255e767b2c6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "c34598d3a5cfa163d3c18f82112ef1c3bd30ebf235d271e764c3f5433317c397"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "589d6f7127fa275883268bb2444d0e88e255ea4c93ed075daf01a04f006161fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "58f21685c554fcd32445269fca41b483f9b9a2d8d6df18ddce9a0eda58611b81"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "b56f6b51106345984a3b27baa3dc3d73cf7e7a8781469c7b4fd5bb79ee334918"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "732e19de2ac7ec17de854a1e90576eefd6a08df555860ed8ffd2862e537eec0f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "61addc6c4a68b748dce5bef63913bbd5770e87752c0fc50839456365e1e532c7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "c070b9faf40fdad577925075e58c1d12053df5da43cdc7da1b5b1f552fb702da"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "cd31d716941b073cb92a8161821b0c4b1a675541d71164174a980c554c05ee42"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "dfa15e024d58ba5ef8c459fd81197c651c5c97bbeb38ff3bda3dc0130743d486"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "95e63b35ee0a99d3ea6c5bbb5426ad803c3b39dee9e897282b8666a3effe6ef1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "27a9907040e3efabca29f30bce2ff1eb01449dc7a139fc10dc5c3c7650f46ac5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "8ac68547cd431e6aa660b0d325e89b2c032bb10b1f2c211c2817c88d8c80b5fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "aea4a455b676d6ff4c817b72e7687fe80bfe1ce5824e190dd972a631313559df"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "2f1054ac317730b8581188ec1b43ddc8b8c0cec51b824672badd020a90714c3a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "8d04ce4db1726dcfaf5742d2dbc8e92427570787226de381752821bdf47d2a4d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "a2f9335d9e9fbc21acfb9e56cbeb90df53817183caf86d90beda0aa88a4cc53d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "223914b64634cf11bcccdae5774f6eeec0df7fa9523934cbf3f745ab865c3fc5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "677342cdd1ac36be0f835dc2347f44b46b84032e41ce04bb6b77a3f89b56391e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "55b593678e1a9fba16bc18a80e604e1975b3d6ede73b551f7d47610dc4a09291"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "f36a6dfb9332c0227902be5d25a7e0b51d8004d60e67086471820d0c51b32440"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "426e90af62bb463e4a5eb682d9658bc27c3a9953332d490bf6fb81a39889720e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "b6172219a3d2547b4b720ff4bdcc22bf77cda112e00f2a84426d101bd5d0462b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "dd8194827eb1aabb656e8c87f2efdb25f5b17ac9b3eb8ece2d8c2b9df40e061f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "f5f80683f1b99d1f8c515ad47852d8cd8fc057f70ddb4a2802c62b837a9237a4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "6c053a43c1e1d8a0341f2fc98bfa5b44398aa98747beade81a8a3ea95705bf13"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "14629ce5d5b85dca09bb5dc6d51ea3393d267e6bdbe56042cfe1ee1cc8a47fa2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "57ce4da3d70fd24fda05339e2ee90e199dc92d13f7cc15f0f9a72622583accbd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "00bc9734b721ed7a6d107e6fe37c5162268cdafdda25917686e38d382bb72d83"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "6e0fe81d2441905b7db9f95322ad5edbfb2df1ef39d2c90fe3fe0f9ca42893f5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "5b0ec446c381c36abc89b0ab28ebbf3dca52199cedbf98b8efab81816fcd8e3b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "bba8fd3163e78e7a87745f154a71e4cb8773fcda1c67096f88e579422144c675"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "7e2517ffbff6ed3909e931eb2780fbb6d49ef9a454e696a203404fd35307e64a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "060ca7eaec333978d1fce3a0ea06c780a0fdf25e9ec78cb910610fc383f9c53c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "c9f50d82af455eea69f887ccf380746bab34a90c9e965b6e43b95407571f10ac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "e657046fe870211689cd12686370e5491dca7c7f31fbf136394a2e3c028aa1c6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "7c7ebb6b3dfe955b98559b8c7df4171e9964344471a7583fd7e5d721aab83320"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "37c0c88f963312f9522aefdf00f05d83555b0f6d65a02d3b2e175ba3031864a9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "11618a23c5a8737ba523ecb3b67a5e654c62804afd6e226aefc5704486714217"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "15dc3bc9125e17e73bac76cdb2e1c93f2b348e80d22e6e850e40907555d944d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "f83c0e2fbaaa7fb6651f4fcac5a444afc768ddfe9ca361b875d310faba601a10"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "11d50c9a51a9dce13ba8deee6b998061a60d08ce73e141b390f42e7d20204424"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "5ef656a6cd0c2f0e1c05b218aea547cf60f1b38a48c10947238ab0c34d127bfb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "32b1ae28a9692a1dcf79a8e370da805efdb9052117ea6732568d1ffe8875c17e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "be9c54bb3f33550dc2baa4ee9e5c9356de15c8efe799162f87ea3abbd1506b81"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "4f7ea0d8fecb94ab44f406439baa75977a208d3682c3abb7e47cf92ded041fe2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "5324fb8f42d2b99d9660f5be2b7d7467bc2efee93a8b4c92faf0e34238402cba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "ee95e70ba6d8745ad6eaab5d9b1db01cf0f33b4d2d73ed8a45e2d824e43a20e9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "3ac979f42279f857cb33b8112bee7b8ad0af931e58c7f27de92f3257094c2627"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "10796cc0812be3282dff61229365a6e3ae3adf41cff5bfbc1d59ee288268d21b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "25d2dc1bca192b09b44a1abbf4d923bae0288771a4ca2b99f794d0b9d15381d1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "65a9b38e5764f459c97342a3960629a3984f7a3ec749b1f921fa024330310c7a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "5711d791eff8941d431f867e3e5aa491ecd2ea8a57bc5465610c0891afee84d1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "5c0831c2bab6bfad152c230e1816c05e4d35099834536999e04569cc2d81a07e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "63c0d628828363ba2eef23e78b8cd17b791182e0d555947c1f18c55d797ba217"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "953c7c5a6cba66ab08cba945ccac57854e7520305f967a2b50d50ef262f5af29"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "6456009bab502b0334b540c63aff4738f8fef3c508409e213beaf9318fcf35eb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "5507c9db34fb6154808e3c633f7e3a087275b1ecfe308e8e6e72c23a21707c33"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "e3dc5dd84eb5323ad05f5e123baa6fa8f6012e3a0f232dc52ee9596e61259115"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "d78e36bd477175a1edc6b0e1147b5cec907e8dc4decc027b92b3f2fc51757130"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "7921fd7635f79c7b684526eedbfaa4e09cc2867ab8ff7e15f15998333a9d588d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "3b4c87b7db341978fc72e2959815263348d6190f24135b2c6bc2dab10d5fcb0c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "507e3b76bb590e8982b0a5e0c8d4a47ca14e290a529d9888200bb73a77cce8c3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "154e65f8375342fb48f4968262d87219a00f648978aa502f7f92c89d6692b95f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "107bd3a5beb8104da49003e776c0e91b2a52855bba0ce7d8de524abed68f66ec"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "9b9c0db0f63a406c80bfd1a2e09460932f123fae959c877b47a3350e7a400fed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "fa69c748b4294f91384eebacf853d1646bfe4b7bd6960ca3b919c8d70ef758b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "48c5a57d1100a1d308d52fa9d37e834eb3c85504df1c2c17b9f77ea5c3caa3ef"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "ecc6212dd351b2cd495f99eaeb9c47c7c617d71c52fd841fc5236e9bea6da309"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "05b4ca394cc84b449a3a544df618ec3e8496be5db624e42f2c1b32326f2995f9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "a4918aabb7304c77a59f4c0a7fc26af4a18ecff15a1b51c2dcfd346d201e78a6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "a8b97555b1c856b98aa135c23af53f4d3367b6eb4f8a4e648242ee6f56409656"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "5df38df748acbcfe1887e30e4c0bbc0814f601fc9f04299d2955b63821ac09bb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "cf2d0296bd4fe95f61e01c75fa13a2ea625e7ebd0c64c48fa61a23ec1b64379e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "8d1978d3aa44f566fb76d06cfa37fab06d10623ca96fccf9273fb701dd103d04"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "69df07bc5faf9d121afde7cef010eb73e448d6a7f9587560170a3f3c8ef1428e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "e93cab9ca8a6da461eeda983a19615e4004a9091f5df99622bfc5041b32a6c24"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "a4132bc9323f247623c8822e1bcaee1f0865290eed4213db0b928b4c34b67542"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "94b4996c64f6db63f37e4fa81b524691d6a732312d2ed1b1d138080f60ba099f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "5842bd44a5a89e0fb622c538c278fbcce45c933df80f529a818a0568bf3dfbc4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "d35d7e63c866cccb2e3a6068bb84fc6ea3e93fa38e7ac94dd9b35528a5fdb820"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "b6302461331b3ec55b0a83b3e529f551593b9c7d319e6608c67f4a28557abc32"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "a14ce737ecd9e0819b417241ff09dae8222946ef2892684bef797cbdcb678e68"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "48df1df671d5bf6c725b242bd0ac362762cc20611a5e5ed7161de614b2e02068"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "0a6ee81cba063abc04f907364ee34f12445f388317fce482e3367387d701ce32"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "dfcdd9148c8fdafdc6844eae4c6bd4e523888deb52418c2b2d61454a32573dcf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "1fe0466c87814507a1457d1d95eab8f83a4de281c2bc9cee29223935cacde72b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "f43fb51a23e62cade440065930921e1d146bd449e0cfaf4298632c7f741ec9f1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "d6bec74a2309d7d828f18379a3537a702a9004f09dfa068969a8ada6e5820815"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "fd0626b045a75ed586deb31b286f3d4112a7d03cb7d28dd9b7a1bf7f2b9a5ba3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "903fd5b4a9f3dfbf8b7a97a93781470e304b36fef07b7d351e01ca9ed158ef93"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE2m1H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "e4a79073c38de69a5193701fb0729c1cb6ef5c056e427d126b64816f7175a3e9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "8d5e141d200681fca7ece1b40427c18fd8b3b4c82dcd6a6b7dafaa2c607efb9f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "c639b92234e16ba8fe248f7c1e98b042480967f6052df87fd54033c736cde68c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "99520eb954c8534a8f0ff24ec983c15cecd806a6a97ded2f4241e1129bec21b2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "49375101127abb62487ad866e77501a2d699fcb2ddc01fa69fd480a9818f693b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "064777a4eae5f8c6aca38df43145668772311c3dec1f07164ab7003b18e99dfd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "44ec993c594495624c40d62e7f6fc63ea6e7411312a412c9c8701b2bc03b4656"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "b4c51dc13ad76628e45cc28113c76285b5814b92bec92c72b44ba712ce44c7ab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "11fbe42ad29c0016bd6e73b9928d68e79790cc5cb8a30bdb534f1f1ae2179e22"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "5ac736a44544b34247dc4002e551ce5c58b29f59c1828b053266d308ac01000c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "ef42aa3c21eaef81293416c8c05dc47f6a84946352b04e9d7220098da77727ea"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "6ce3fa0d9884ab4d099e0f544548aaf4d3bf6e45093138d3c85fcf4635315d93"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "48d160357d41855dad0d74cfc732df15d56e1de73c165472a7c042eb3dd16af5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "0f7fb4d1ea66aa120fc510151cdf115574cd2e803ed35664069fa060dd621311"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "a0f4597dfa5618619219906153add8142b0f6270c6b847c5df08e6ba850032b9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "40f29a506404c23e74710a09c47bccaa6b7675e47918acbc2d4c9ce341f71489"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "d50c92b846c745a4dc1cae31d0885e38a77d81af34ebfc028574e5d764bdfd18"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "a4a24764e4bafc6dc7f5c906351c6227c9065547f631be38fa660cf75f860c1d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "1f67b9a6d793374688d9103ddc37b8189fc3b3d829dc8ddbadd43fbc1d251adf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "a770b8dc08fce95f0ec7998514fe09176ebc6e503e3da19f3b9e84bf01a65104"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "d64a8bd7f516ad9352ff5d77eab6478b6a14c152ddbded6f4a9ed1c956c5fa50"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "a90127048cd17a1237ac37bc9a8a154fa9a856d263bb47e4bb33efc02629a566"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "c4d0013e0df30b68d95569b11fc7e9e2e5dca1270d7e330747543acbbb668945"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "735297d189b43be3faef331a152775c4da674d2d96200cc4ceb56322062a3882"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "23b3ba5e7b513a723676a6af1e89077e786ca0177eeb9e8c8449dc2c3fa297b6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "1701d35e5b404cc0adea41931698ec67b32349a618587d983e3a6745172267f8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "607c92589f41060837b871ef1409b4a3fe5771b137d9970b0123f7d6557fc612"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "09da13be0ce9c098489126ded13670f72b25ba9949b49bbc781b57189499b4fc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "7432ab462a552af70a2100df94cded41b0e888ba9cfceed74d1faa42828a6985"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "7b1e645f2e699f3270e8a44a0fcfca04404edb5c7680149bd7c1e5c3122d34e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "bbba2f80d80170fc6f84a67cf3eaa1593fdde1d5e85367d71731dc56c5faaec7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "f15778a8eba5f54e46a6a7978038b0963f99a4fe93387d5815393d39184a5e84"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "65471bbabfff790f62087bd5d454c4d0d9f312ab57ba9289e4093cabcef2a447"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "81c8e27d3fbce15b07a152e7d9bea3ecc1da4d2ffac58d454dc81335495fe235"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "d3783a8665ff10ff8d495b9aa77a4ed1f2f42982c426ba418aa83560d34b6cd8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "ae706202be99e5270da6403af25a0fad41f30dd005dbb01207790aeed7523cac"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "777dfbcd7ea57298def73910da8569815aac274bdd0dcff5ae6d86223418d21b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 160016, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "a3dafa7e53ad621bec22f9a16b277c22ded789a6c80408be1e705d7f4f1cb7ab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "d8b575bc81fe1f0eb7ed9e226de9f4a8dc9c6720fcb1af1896e6758874394f1b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 154384, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "1d9ad6477f3746afb65cd43fc758c07e49ccfa86450d9fb5d81f8cc5125407b1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "d4c818c4fd97f5fc3d1ba2ff83d2b30c0639444466678755611d1941f0b272f2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "f0ff50282473946f46fac36d5b4c45d0a062e911c19a6e14b0ef2c5380875f48"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "d7bcae58d478ca7fcb63cfd21a111ad991375c3e9a1a1cfa53763d662d3487db"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "14e89d34ed6dc5f3e4afd7463641754d05c71fdb626172113a44b8106078b986"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "8c1b73b72a498f0280d2b1613bec1df6adf85b257ad8c2aa9d0b6c80acde124e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "a862c9198bbfef97313059ec7148023f993d3b68927498c2c8a56f173d56caed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "5d572d9fe62a9746fd5a9f52dd18f1dc133d4bbb2460b6456cc377c7de988616"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "2271126118d29d60408876fe662ed035ae9539892ddf090ba2e31d7b7782d003"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "1848f6a35891a2e540d60704f77b3271d89633454dfc02aac9bc55b9faab4d73"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "3e1a7cc6e5fc54136a7249cdd812e69247cb63de3942e21a6d18bd61363f685b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "ea7f0e09223e23965787cdecb860f80ec1fe33b2b2b1959efc66b7113b83a7f0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "875b04c35e1c67a29dcade12103ee226e7682c551d63f2132263a86b0c530d43"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "06c67262b32cdd852c537aa77e5add4cd8bc4aa92d20fa98c0ee297cd3f1e938"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "03a104df492fa2fb791e5a3eab619ca4a260fd0c72a3e95248fe80b05fcc6840"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "70a740e3faf2af0d1c615f13a125a5dfba363f77c57cb0ca67bf7976398e1bb3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "a820f8461c8ec125f960f529e0afb99198a4ba53e04d5b31518e1fe6fb11d3a2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "d98e1287ede4d4e6a08af84cec2defcfb2b1d36a67a1d64b2cbb76478e34df53"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "0049deff8ba8453340a0c7cdfdbfff3c11f182e60c8db7e84ce4847ec071ca4f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "4837b51e3ba12b807041addc3aa74c1e841915942a4e9c969de57ad57c27585f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "c11cf3d840670105e00839c87f3e643c0bf2ab1f771b76d7192772e9f59ddc2b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "928e36d5341ef6c12efeade5bfa89e8c5e57988149081a9118d94cd6bfdd44ce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "10d1101784dcd94254ddedd6d0beb29e7defa21f16a48734f36b840114a33c91"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "f21398693cdab5b75407b7c0814325a43f04242153b03dad0b95973fe59ae5c3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "161ce9a86a012abb9e6002aceff238144c5aad2ce686616c1111d8074a2f86ec"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "0098fc44abf9d3cf8c1c0cc9b3bc61fcfde6c7a2b532c1286bb99fabdcdf3dd4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "78aae5cad5d97f224da922e31da21e2b5c8c162a7eb7257be0045779a7d949ae"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "8f04a6f491f0dff2d53d027d82c078c41043d2a7e4b86e37b410175d313a7679"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "c1ee3ff5ddbe4b97c180ad4ea06d81f92c07c96292a7bf19a5822dcbde7c7acc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "a89a76602e87d3f0cd2c5cd97459509f5b9cb38e4a0cea4b13b457aa441ebf52"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "07d9d4e77115ef5809ffa19db2e92f6eff3336b66cf36297fb1a8e196f7c019b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "e76132adca4a31d5ff9d871bee443593f9689af939046355b59997e62c378bf6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "f3361a3ce6c7046a3d7edac64015c9caef86a64f6f508743acdd7bdb870d1226"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "54a512759b626a74d7c67d357afd45a933be190317477d8f5fa80235a288440a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "80b458c6a591277a6ec40746813e2e28c88d47a7f2bb7b56c5d5465b3465009d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "c09cba7f4c5386ee3a00d204b1b216c9a5b9d1fded7281122471675601fa6f5e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "97b034e8918fbcd3ba9b1c46a0d19303ea72de26b3ece5239db728e64a2d3a31"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "03e2c4667f42a19c627cee284bffce01bdb48ee9f804dc8a38ebd807929bf9ce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "97a02ea3a63c13476268d0133aeee859eaa90b9eeef3fe5ada5aad7a7785d5ff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "dd8909536bef1a2471b2a9c07c28c03f63fc79d6316af1e1c68a0eafd84300b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "1341e6f8e29b614e66c2db3ed375c992229598d61355d64c299d9d95dff9bf1f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "a368e69c73dba379ebeb83e699c315bd60107fed62b17475fb6222667f485cef"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "ae2995c800857e044500c8f0d009be0e44e92382f91213513de3eacd69a8ef92"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "a61dadea2da2810104209ad9f9e725b241dd0b94d783a4f6abfcb816e4aec78a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "87a39842490618f1802968e771b1a6e9e5d18a7e8674a695954195d8becb8472"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "2c6419880d7e31627ec141370a762c2a2b40e1a0ad37c44f8a9eddebcb95f051"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "d16aaf8db811ada461d309fa1b135d26fb322df509ad0e1600f50b1c86a390d8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "32b065276ce67846573f38a6ad7ebfbdd7d56ed13fa898362f6d6f84cdb8a61f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "c3ddfb816f212f044f500840ebfb540799314cd3fe3f1d687fea1bad35d892e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "160ef26943c53724b8d2cfa0b9c14e0b1071b8b8ec6949382a27d292a29fde34"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "7e02fe26f61bf6b481e4c3afc8aab2c5f3e0deeb550e0e955c9ab16479d41b3c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "d7fc8aed1e91225ad1bdb521c2c723b2c0a6cc96ef060ea1062d998f9abd59d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "14acc03cb3b846fd86b9d7b71b07293b588aa0a86f73d82030d669caf8ee87ee"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "1bec2d8e7946133d20c2451f715b6d5a4882072483cc570e78c808014839e59c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "71f1cd6b59ac287c7d9aa52d1ffc111025943f87ae870e494fec00dcc21fa268"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "516be2bfbcccb8502426bb1dca95345b79b7bd7fb138bb6a9d0f5cfbc20b8beb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "cbf74815b3828886304cd012d3cc5f89def2e7cf02eb3d14d690f5a4bb4ddb3a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "af68b00a0c6d060c4fb5c8a99bd278ee08c42da75859b565d2eaf5c687984b37"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "713be9e94376a7141fc21576e9db483f1dffa3831ada55f9950cb23a293f5e2f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "c6c5c9ba19137bb03fba6c6994a319ffcb508f1c4d399bae1c53a98ac58cf75f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "408e4b5465749a346919f87480b86236c21b3acf12ceaa974f82f592ef7fccd6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "6f9a01e71952fc7eb2a2b7d522b306caebc4b1e7cfe6160c5134a547c1e59b37"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "1f35381328bb28f9e5959a74fcd1f893a0dc2c05e301cb6fee14eb85ced099f6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "fd5b0b7ef04e1214593c566eaa96c87fe31142ce72a138c734e68a252bbf9a33"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "335a685cbcf16816e48ee401937bec4d7e32792cea49332eeaf8898c237025f8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "be8a360a5e2576c6f14231f7d7d0cad8834b6a31406aafdf75cd6eccc5d97e5e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "16982339c47cbc7c5552380aa41c62563c6a3939e56ce43c7f463509ecadf3d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "bd358ea844b4dce4d8423a6929dbcba3128c9fad4f73f37f994b8d4fd7aec258"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "a751a5ad505d223f3e39b820725ac6629bc57c9c592506a35ab2be3391b2b050"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "6d33c2de24ada7d3d3d71a61cd14cae6f9a117a220aec31ab615be6c91ee570b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 157088, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "eacd21ae743ad361f75d743f88538766157b899c5cd97f3ff2af0d96189829b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "2334636ec140349ddd5529e1731486dcb7b8ea8117372073d6d13aa6612d70f1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 152992, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "93bdb9e48c330c3f91538267412526f4a3bbb6a8fddd6882a62a1637cb5db4dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OE4m3H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "f319c7d9cd4ecac5581cdbb748a2a8f014868ef88ae6bdf0a4df11daca450a6d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "37ec61af6176a32c5c911fb14139286350aa4eaad891e5d104ba8f398fb9f66f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "fd9eb14f320afcbf3f6f9acab87d0afe5c55847f8bef905ae7f62f4a1c04cca8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "2a7e4cd4126a880e3f148f65a86130c2da3303c2d493e2c8b5390ae61cc20f22"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "a17f3d30b8e9b40caea1661f8d25d243aa90bfcbb7db6f2029861bf904e4e87b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228408, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "e69198989fff78f375c9c85740e4a1cef0c0ac9020e1585b1bbd59f26935c933"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "cd758160bc173f449b674d7d6e2929c15f02b44573fe301959b39f83b1f2affa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "782ef3ac348301c04888eebc304e76b715f48b56bd841912ce8a58588975478a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "c2d6b9a1bd88c903f9d831e747400170bb47ab5ad9d5e747bdc809c48c5f01fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "3b1241a5017dccd6a94d053af67bf9254c745af54c98822e3999fc0fdfb84e98"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "7bddc5f7a8bf6b17c4360a329c2995ed1bbd5e073390c2814fa6e2a3cff7f30e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "8458288397bbf9ea81405dbfdc5cd76261b5bf129a093f486b5529905ce82814"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "6b1095363a7ab392d4834876c03b6938e06e7f03f0d1e6346ae2e8d28e76ddaa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "d2db2fd41cfa1782fbd8eb1d29d2bbd31dab406f84c5554eda19991ad7d24143"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "35101c7748300c30eab1ab9abe9af99033f3be7aa33d6b06f6d0ed4c2721a3c6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "e022fc8c57f0cc20c2c2dc12b1731cde0c7f702609ecd4147b2cc405113911e8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "569d33ccbd1bd45d91719340a4f7b3c7e158bb018ea8fe4375dfcc3d7c49594e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "48625692415653ec1f475d72222a745a999fa7982821531d0f627348fc21a76a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "ffa1b95b70c8441e8fa08727710fb104cfbe4b9fc8ae60d007898ee50cd41de8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "cff550fd647925f0f086c7239e1bf020e647c4954351cb242f30b9a9d9d662cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "deca6e3dce36bfa4dd965d833fea3f998e0805ff91a1cc6554931c983a3c93e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "5aeaf12eb2bf600b81e8d2abe915c9a92def14b1ee72e32c286f139a9f494899"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "51c106c1648a9129ca24749e9911cbd6a19a3421506ebc10830d2d25642007a3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "f43a5144232529fd455f62d5836fad8f643e9e0f005c7e277974c2f723dfbf78"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "3d933446539f23839236720137be747198f30b0ff3bebe309c81de51e28df8bf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "69a74749432d4929463ce34518d7ef980def4bd5c7c63ac950c5455a44f836fc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "c32e73e7d6602795bf92a5761790aa71b72961c8f2c222ce4a5ae1fb491d218f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "feab3e76a9656900c115cdc574d44755450ee5132eba27d95459080a35e1bf45"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "1d2d221e3516d3795233a9890871df306d2eabfe7fd08727bcdd623efe69548b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "04119a3862bc615e28af0d72e5c183bc63f972ebd588f851642066b0ad2f3b92"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "cf11366071eac5631cacaf99083c02fa7bb465160f60bd8d59060701792522c3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "20f6215111fe12db861d7491ffd7291736d93bf94ea7093f2107b8d03d392aab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "29d241104d761789a558905ba75b836dc585f947875a56505946d81c0bcb1b78"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189608, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "86c84a4ace7cc2580cb66b9645a7c70835a9ae4f670b070b96b2680b4d77afd2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186024, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "7ca329af237fe06f2db74a810ec49a53d24b789a761221e07d5dce27d36d5152"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "96e8ed8d31d6c724ef1774ac514fcec9645e37db8ae0c5563907178747f00466"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "6da628ab5cc2bd1a9e7417932bb4d0f4788ddbd5597ade5439af68209fede671"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 162064, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "a6b7e72d1185724072b8718877fd625c038ffe9db958950940bd7ee760add052"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 155808, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "19363b2d30523842f8ea1045373cc2241542a26c1d0faf08513bb1f915d9b892"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 155408, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "b44292275a722e916f0530f276ef607b0c8ad99cb413c812c46854742ebea6e6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152224, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "e42f14a5e8c41605200b29caa7a966d4e180ab417a64b83ce4c322cfcae768ba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "dd61e18e9e3e582600cac93ab06ed86e678c109c2e6f885ebdef9db72fffb5a0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "c64d490f4be1f5b7c89895d895ee52f2611a6f35715ccd674876cab9cd5522dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "fbcd9c9be8e191944d07947395ab1467e098b4ca67072c641e643223def59d4e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "646b3453481b8b73802e3c11e8cf66f359d6138c6a1747ae134b2f9c27b87ad0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "d55db08d45146055850bf1d5fe24bb97b2103ec4ffba6c85279a3d6226f48d3a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "b80161ddddff41bfb961de9fab340312e1640e310bb4d27ea07bf4bd2a0e3d71"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "894a1a5741d35cb00701b818c614c9785f96caa06c38ebc6dada5f67e63a616f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "f5bcb1525b0efc49cfa8f266a98a92a0a71a93165cdeadfc9ff50672bc83a958"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "b9374108ce9faf64193e64dbaef1133de781c8b3fb1bd0679fbc991576765eb3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "8aaa65ae3302a3557298776470d7c53ed664d6f3d83c2fc696fa1fe6f0fac44a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "b95cfc93fe62bcd07a7428a8f224809ab65a5a73fcb340f88c0faa2ff9aeec2d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "cad28cef119116ec64c54c697dfc009fa9916372e6d3aa5fb928bf91c5ccacbc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "42bdf854976fb736599a5ce9f54445243760451372513ff082b888066e58e117"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "06fe3d6b713f27db28d3b892e51548bc0429ae15d0bc6287452491c0ef69efa6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "19a1f1817f582753fb3946c880790ee84e69d1784686df72612490e1195990ed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "1fd2636c1be0d84681936c4b96d3c4101d9fead468c85b43b394bf19455453d7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "284e77f84f4d03c225f92226d84c236c018d434c1573c93caa5d536e92d552d4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "fdb16b2ad2ae240048e8f6ff403e5e8cd2b2a0e3bcbaec1c9b7f2e48da26f87d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "ae149f0c8aa67c87843104a3796129e4bb3791d58d82feef32705bc6fe8f2c22"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "a2b94b9fa0793ac21830b02e591e437acd63b1ab940189b81aaff1727e9e9608"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "c246f3f1703a37f2d1980cdfaf7eac7d340a254709a80823ab88601d3830486d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "1e97a3633c83829be193fc2759858291301863305d0089dc47888837354eef17"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "77c745c3d9ead8d4bb2c99e6b87be4f834d48ce1a50dc14071d084608e3ea1ce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "a0ab900fc859a81123b99d0bd774ce389649f5221cb4065f9775f3e23112de3a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 191144, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "2f0900bd370ace63a54c73b5958bb6d172900a2daef97676d1d49976acf1b2de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 186536, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "c7eb7f0407daed5a5cf98c5934a3e33161e66f520f491cca9e78c5ab8f2ab29a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "6ef83e790fe72578521f5b81f384db13863abe8049261c2e37ea9f3d71f42191"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "605c65745eb64a8a7faf4854e92fbf6e9d4129d20980a3bb42df288b8417dbb4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 166160, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "e0387a5401b16117d44f19733cd5fd498248af5b4b1aaa9678de39aa377c234e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 157856, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "86be6c5ffe0912c2a06584b2ce438a4d3a1c13eb0586461ec9a9128a8e853e19"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 157456, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "89ca82779e0daf7331224de81d1c78b4a2edd631fff07cb052c53288d13b22e1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 153248, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "0dfe7b0a29f424fec44f120df430ac523b19dbce881a6f7d731188ca7adcc155"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "c58db7d36ad77ba3a1c78039f9483b8640dc84204d92a314e76d36b87fb1e6c9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "288e9bd47734ba111b5e00daaa99663fd7f60f8e470b8e08cf796435220d2875"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "ce94930c44dcd9b1870691f20b8f6c558f1752cc6374d6e6560ff49c0c591e28"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "5733f05d1958bfc66e8a647825f08f0591e492dbc63a8353453b8a093463d7df"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 191816, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "f27db3f29ee74d9b77fc2a47672bedbd69ad746891fbaec7259d3ef57cac1910"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 156992, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "cdc941c9c1235bf2daeb268e8e011df46371e49d72742c5ae199a409070711dd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 165280, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "55e03bc08d3729b49a7b349b323b5c79afbef32ef91c4c8295a20ff5677530ae"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 156976, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "f0a0669013c86ca7d9d4f977ebd58e31d01246f6a3a4e561d39930e9cd11cd00"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "26953280e4457b6ca0de4a155a8a3e110cdef997db2edf234f360ed6a1445478"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "0a470f0d3d37b697d7f03f99a3f0939cf3654677fec29ce22de0e1faefcfb3b8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "03d374ee2c5f7c0bec6e8f5931a1059c06563128168ec5ea2264e8aab98f5107"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "ff62d27c5f89fcbe592ae07a280b408fba80e286039c8b45d31aea16d434ce78"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "ebb08f4462f9da831374506c19bb1e0bf3ffe2becc0ace5d1e96a53099eca73c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "85249820e61d9d25db8b892afa7f6778d5f71b1001c17b0cfae0bd6f57ad1bdd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "c0319e58718a437aab3db52aec8947f18ff925ae7915ab68c94388ceadfe1afb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "300112392319a92ed7899777401292042e52cc2878d05d687ec4a286345b5178"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "2ccdfa739250d7dbb7661b28f3cc74df9a883e985a47ce4f72511c82868bdddf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "312dab4e691a49d3810a6a987cd871ec3794219b061bfa39c06d95b7e8bb2c67"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "b5e83d963abeb7aa42a1469116c1be8c00e1c4a0fa8b31f1de4c7c7acd38b502"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "9d4e85b801352e993da5e5df1f829c13c6965e9f5bc0173714ab2950bd258ff3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "83fe066659782374fb0913511a145a423aa97a4d3546ccdd78ab7e6beff349d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "f010bbc5b321f3826670ea72351bbba96b7f1cab977e6338bde79a502af6e336"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "0eaf8fa68e867c1f6e636f0fb35ac5cc319a99f27dbc84af4aba8d88f6df7336"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "4998c57197ca6fa93859de9d8bbb02d62ba28abecd1eda308965cf12264c41de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "7c37fca5396207960dd572a958b4f3d160a1de609f2805bd90fdbccbafd0fc57"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "f3139d9fc683e9ba78e05ab1f99b1ed08071e14718499e718f87629feccb581d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "11621b9ba316364208d31de8d5ecdcf65678476214b7c3ae37d1c3ab5efdc720"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "daa33dc9921ecfc9c91361e2c8ec477595fb9d5b2836e7f1884cffa42484a5a0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "6ff12f91bbf7512d2b14ea483348b5a457f0b5594672d0ef02a9fb281b2f0615"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "8402a3c89f06dba58e3fe8acd3b8ecf01103c056f1b0d50082a7289ba6f021b2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "a436caafe668a24d93082c7c1286d1a530ee28ca8a0fdd83e51f87f4ed7224db"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "bfad1a4a0cc86a138a0ab8755c3f37b8fa23dd2f187d27856561477ad5f31e61"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 189752, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "bd9f3e9a42c11c0ddca7751986ecdfb0cadcd43c02d9d27b333c81f17de7e345"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 187192, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "7216e5b856113e8235fed25ae19ddf298584c3c9cc68012b2212e92aadd289e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "a9b511ad8b1680184488e65c524b2481706eb1372758e7768bcf1e2f589eae11"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "2cfe371c4a8f406f9987d1f6d341c503ba79bfaeb9f53fcea0a390686bf56307"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 158112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "b99bcfd4be16b1d88895ba028bb6a47d37732598977c96e014bf4bad96234bc1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 154928, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "32d1f91630a9cef5a50f241010163223e4559613404e54a751adf2d626ae7704"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 153504, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "efe1606a106d3672320a7dc41ddb4f3a01ba769ee19ecd90c2abbaa9891f75e4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvE4m3OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 152368, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "209d832b77b7085ecf0976dae7e2d4b57e4994a4db5b8634f75612d9c73874d2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "376c85460f1639832575c0457333e297141ac62712462552879def5eeaeb3e81"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "0a5a2a18757a69a5de03712ec84057fe4f7a3cdd54f6564fbaeb544aac1f07c3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "b975e4f2bc46f3be8f50a7a5e4c0e2e27f95c83fb72ca2a1ee9d5079ebe1aa67"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "256a6a6052e313158cbd4a21cbd0199b70e0691176b3795679321037bbd24a40"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 228328, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "25f6addad72a136ca71ec8d40febf7cc815b37cf3504758cb7be7f7bc7e9aa31"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 164960, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "5f1c85d738846fd02d121fcfe5421b1195f23b47886b488b6aad5b1818a75f14"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 197824, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "25bcdde15cbcf2aa7fbc875386f90fc07d6f635d96fba36b77a3f39fac04005d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 164944, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "7629e9c9f1811e5c4dbfe4e5c18bf685edbaf08b5f00fbe99717940b6afe9f41"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "f9052c56d52c8f2b7e6c76929d690755cfefed484b9e96c0dbf6616307f2d2c6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "37140fb85efad28e5ff660cd25c27e44c4d5a5cd561fcf2177a00e25c269be66"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "9118fbd1285ebf58c0f3eb915d625d76152765eac43441492aa9c174b66ed8ee"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "e11f964b46d89ec952f62fd4f955117c6edaa223bf26e4d6777e4406602e4a52"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "8accc630f3bb42266601d44682c853a3adbe659edaab92b639fa0df1bddbdbda"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "41c588d2ff0fe41c92d06b94e254a48c374e2396c4522b2dab04f92a145ff826"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "86fdb61f45e1b66eec2f878ed0a41f064ac566ddd8e2416958407c90c23d9c0f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "22ea3340cf370dfdcb173b3663291806189be85c4ef85c5d9fc7d910667a45e0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "4dbd514dfb085b67dc89cf2d0a1982063e7b2b1c1636aab320599d182fbd907f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "26524c98f9f8056cdfb9c975b29ef70161f41504b8215e2b48b8b44adb04d129"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "8b6df909201e2f06fac1a0c6604e63a8e4ec4343c0201282ccf45b036f744111"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "1eee5d4df4ea69d0581f62e2e23415d33049b5c82b0fe904c69ec9b83c568a83"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "f017c88f6b1a3130b6801b2c01ffca9b5d7fbd5c89b0004e1bd9c06a2e6c266f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "92a2b4080b2847939df5ef55ae811ae9575d91616affac9a618ef99c066f90d0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "7ac1637dd9625dc30dad84f959216cb6728df5fa9598149273f787ccab46c6b5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "6f198b0bbc0d1014614546ad9c65532fa4f0baa22b7f1dd089266a961e969550"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "6714a08e9ec30de5155109defb4ad3f4ec90a449f8142eb661186b5b7b1c12f8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "e57045003bf87bb4a8cd8bc906bc7d63b3c84f54fee95375fb3b2268a97e2b72"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "aa72394b2911b985bd225ebd73735977b1ac8ac5d80dfc2a56aec8b34d0aabc7"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "1ab5d8448cea81e5407e60004c056bdd10c0818a1e8604274fdc58b7fc5b5c29"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "fc4decfaac88bb47b9c8d0ebe1814dac77c754c8aefb931fd1f1ac5d3774f86d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "f8976b822ebd5594502a6e925488e67f759699094d9c1a56825c197aed802c71"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "2a0a06941a263f6b5ba6cb7315f4e09c1e899265ff57483aab952f0ad4a4234f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "b64ae92772466196f1e1201b177c6c384891849019ececdab801e04ae6616e9b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 179288, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "3ea4562eb10a33bd5df7c63403326aca78fd49d9bfb0e8d3d54422ac9d6a2a10"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 172632, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "8eb807d24b240c145cc3afc53924bcb4f9f9bebe3c917665b13deb0dbda3b073"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "2befd3db6d384e8edfcb81a18c46749b5aedaf31ca0f41f89f74f7fd5e339c0e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "35607503635de156a8cca7ab043cbb38156211f5f632293a904ac07d9affc36e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 153792, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "085384b9445d6abce14e5107cb43a2171d0c960567c16e3310dfaac32e803931"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 145488, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "c1bbaed6c931a89d6368bb1897e9439ca427cdf36bd83d2ba7e2c48c9f18b7af"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 143040, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "31c01825cdf8ea497b842f2e426f849bae2274f3164ee1732ae3312cd1d9dff6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 128, 128, 128, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 138832, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "f35a1536032de5b1912bff9be6b6ff8e3d0baf9996d9c9e5650f8a5894a72fe2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "08a8562a64917bf67a6d38065c1ae5ab58dba5ee67f6990c25f004df0ac1b26e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "965442b5452e7adcb4fdae5d2ec1ee2cb57254323d75af71349cd7ee70db4373"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "e77d59c698edf68b153c8ef216579ec57b9ff9d83db3543894103924858cd666"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "dd2d7d6f9331162d036dc08dc6eee879d5e1e1e58c916c04a49bae424772ae5e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "d02975bd856022b92f73b4167c6b6a679ac0795e8d196e71cc5a5e3858cf1197"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "2c13e7d66b8b816a94fc98ba7b784c8b73afecbdbe85a91c83a8f0b976bdb9ff"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "aee4cc98f1849f6ea4022061e4fc1eabac1ec2b8ba0b11409db5b5ce90bb2cd6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "d54e74615feefc867778c8f66b3ed79fe7daa956005e5d10ad51ab2c759b334e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "9b31a600ebcdbb46fc28ed3dc24e333f07f95f5a3f924b19b21df98d2889ac84"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "9c2d9ebe7478b08053d99f6813e5d2e36f0c386013198567638a5f4e0d614507"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "b5dacb35c6adfa5fd6873cfe58afb0aa61a38dcc6c91f33b5bb0a15a3a26a7cb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "57e9b63c7b58d84d4cbb71b0192846810e7447b5ce4cd1ec97e02cb9be095451"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "25b81f8de30978a4f7ac326a1a00051de18f5d51d410cb1bcdd5faa00c499035"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "5504933816e3f65a3367e4c3df30af93409d91a51a07fea72952c0c2b912ddb6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "fe5b2bb47aea5d1a459a27feaec5d6841aedc0e1454be853379e77c3294b9f92"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "74bba79b3bb0a5cb0dcd960add60f7a182e2f69143fa1ead5145596ab41b9d1a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "7d912db5785402fc999ec6302c16fbe50bf96a041e3c8d4174de1bbbf6e29f12"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "daeb37494d0a8c9aaa4e50fbdb5880c303ae21a44a38e1e2f5ba1009c13ff4c0"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "27078d7b81fcd4c862cfc3ecf184e8cc4fe6d7467f235148535d3db6e3ab4321"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "7a03c4bc4137e07694595cd034da0365e68097a86250a601e0c2d22454c16ca4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "75d019c99eb097d4778939e7de55ba503079b0c585c084ae0088c2a5f61d5b94"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "64209fbb4464feb6d3e4d400ee9311da78062f950b0b2abb91a65df0b9dfbfd1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "8c18ac93e2ca88576872b154449f0ad8f6f523e1e623fda0ae8c380d3e52fc22"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "1d65e97194128eb72823b9a689d9ea43ae293aa37f55e6fd9918bfbd0146e344"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 182872, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "99d43cff4d36039329e525774647b7bdbddaac69ef311c3dea1953b201a212f4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 174168, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "9e98a7d2820007ef2e0732f39cd484b2df6de90ca9d8713ffb1247c5ed8fedc8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "9f110d53b8e9b4a9df50d12ec69067f05f42d33514c93bf7dc9b430b74dcb8b3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "bd7e086cd0b8451b05020c229651323fcf72760711da382a07fa4f4256bf3155"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 161984, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "3b62586d60518483c2625df27047682b048ba5da938b9ab78d060c4790354db6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 149584, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "7a58416a72bbfe8c96114c2eb70d5c1e8251b4a6e371777bfedf654b12d90da5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 147136, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "d07dd1125d8b238b07ef9033c7203ddcc0be0b6fe086c61b4a363d1fdff31478"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 256, 256, 256, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 140880, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "bd6ef25e1bdb4b9779f9887e88dbe05ac14d3eb0bae5c54aceff0e40a37b16a8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 32, 3, 3, 128, 0, 3, true, false, false, false, false, "faae3f46e871bf5605f8264bf013aa86fe844cbf0a28e426084f850df64c2059"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 32, 3, 3, 128, 0, 1, true, false, false, false, false, "23b5a7f28855bdd9d09c480bdf3c35ebd851309645a8695af4f01d11563c4a90"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 32, 3, 3, 128, 1, 0, true, false, false, false, false, "1ba44fa0511c50431f29110e72bdff1a5234f4acaaa5d845651307573198612e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP32VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 32, 3, 3, 128, 0, 0, true, false, false, false, false, "d35cb8f85bf0ec1f2ffcdef96d488b2d8e49cf3cf504d664460b3d715568c354"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvCgaVarSeqQ128Kv128StaticKeepsAbForGen", 199864, 512, 2, 64, 3, 3, 128, 0, 3, true, false, false, false, false, "71fd2b4392e3ddc8799436f1aade61a61779a8e3c4eaeefbd1688f5f7f78694b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64MultiCtasKvVarSeqQ128Kv128StaticKeepsAbForGen", 165040, 512, 2, 64, 3, 3, 128, 0, 1, true, false, false, false, false, "5592a477099d15d8cbe5f5714465b8f681a07d45d05bfad809fe08899af5f2b1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128PersistentKeepsAbForGen", 181520, 512, 2, 64, 3, 3, 128, 1, 0, true, false, false, false, false, "8d05b475461409b425ea592f92761b43bb6de94f2dd07bb997466e9e53738c2e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvCustomP64VarSeqQ128Kv128StaticKeepsAbForGen", 165024, 512, 2, 64, 3, 3, 128, 0, 0, true, false, false, false, false, "f6b0ca79987c432a80784cb6c0b6a79e6f6e65abd3a362d5811ed188338ee232"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "ef96d8d81b6a8773b899bf49907ab6d012e6d9300ed55b3f15ed122bd1a99984"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "05684e78582d38451ebe1c642d8e111ec982c84994de249f6ae45049fa209349"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "e03b72f119e36f9a093bef0da6cf84940fb250c914e06b14eaf10720626165d8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "0e200d51092828f51ac5dbd08d48c89e2cfd48477b18bae73c86eaa2149ac9e6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "2b89864d9bf56db027c691363f45135066a9d4be0012eb00a1a88352afd4ba9d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "18f2f2a39d2c79f8c0e745cdbe50883e6906a3e70efc57f007934142979992df"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "5c8aa63e6b36b41a08a1c22c98578a2e0f67548ed794e42957375f199b6be67b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "de16824a845cb880bd6ec6717f55ec2019694a970266c0f0eba1ee9a5ab1dab7"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "76b5be703dc1898c8720574bda6941fbc6fa12d975d587a1b422f5d6b60051f3"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "a0f927372399b05a0cace12ab2d8f20bc2ba6cdb871939025c66bb17a48f6ffc"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "421cb8a599bd669ade7704057eb36b76697b8e9c195b5b0bc333da5c8132a69e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "26d6b0e52ace63e41ce04f26faab394d3a4958a52a3552eab815c4a87ec38caf"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "476c7ac0fa3e408c186df69145b77e22e826a45e11d420cec62ad95816dc97b1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "679d2f06db880e8d872b70d9fcfed5a6f846e0eb9ea11354160bac6c4621f87f"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "f51f5de778e2d3be9a67f7893f85056236622b626e1fcf3f70659e79ac5eb652"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "616c9f85efcfc030907d9dfe94885a667543db0dafe0b6c1f47e70ee972665b4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "ac74c9c252a8f202ad8b9d1293560ac7e2aaa55d8093b605b898fd7047f26698"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "0e6c5721458df4be8703192c3c5e3ea4b6f5d9ada2795537d14e7055aa2ce413"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "aa2e33ea8fd070963f458cf19edd49993f12bdd0e8e33e2968bcfe2a56f91053"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "ae73bd7503bdbacbec935560b5b1ce9dd7a320c21cfc89a2380ff4a8627decbb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "c4c726b67f23cca7597601553e16215642a4a53afdef0f0df5ada8d7ccc53a5e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "2f651bd6506d0d847c3b92038091e0d9bee3879436c0048199ff006b32303634"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "9e30a289e95a99d00998f28eebad503717b236632c3ae6cc85ccb1f6a8000928"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "4d2bb2bc80b76d9957488f7cb7b8dd472bafc79d09790b10aa8ffc32660995cc"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 194728, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "61d27c1e0bd331eb50bad951b70f7ee1858947d1ba20bf44f1546aa545e5b846"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 189096, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "784447414701221300b034a3c8922f7e7ffb1b4f668e3c0b29b1829852b26353"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "a70b88358a034c9bb9728eba5566f0074c01ee65d9cad919050498424d96a2ac"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "847f565a93771adfc68059f4b65c77c5c754ecc4d48fc3c93451e45a442bbd8c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 164112, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "6ddba6cc046628cc6eb6f21f958068228bc7f35991253e69d6596aa7d8a698b2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 16, 128, 16, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 159904, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "9d3697ef5785403d7e3e8c2fb66f3252e9ffa713876db73efc5eb240732b3224"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 156432, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "8445a6bf67dd7dc7cc5314e0370c89cb5ba1bb8767dbbdea19eec6b9d7b27a24"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 8, 128, 8, 256, 64, 64, 64, kSM_100f, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm100fKernel_QkvFp16OFp16H64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 154272, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "28b0941f58c1795ffd72af047959c013bb45f98819045f78767ec8b470a5f116"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "dc657891163f426f5d272c2b88300837a6e36b49dc530cc34ef592a6d86aa482"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "929180507d8c21a439ab30baf0d991d55846f4abc793efbcc4557134b027733d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "fcc61dea00bd05e1a6c01e6b6b3a2739e590662c7f84a1c7399a81adca0584d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "7fe5ad2c1ae8e4579adf5cc8614c1ec27f9d5efb793c3dde12ecd61f092f9701"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200824, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "59ab40b6f4f72c87479ff2eef8ccd5a4df8851be321aa28eff86762fdbacb577"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "a458e09c2fe1a2976ef14c46582759428c9af39938d4d3bf2f60baf9b09b8857"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "dd73fcb9b85cd985d5ee07af4324e482dc3e26f3adb5f3a9657571711b4a67c4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "767c7ab673225e7c40602e03693c2867dbb70c22dd899d6e7155138396c3d7bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "34b65256cf4aa2e1c0abea62f5aad723dadec2f71fa999eacf44fd0ac7bd44a1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "969f552593aadc0d272f23272c6ebf0883867686717d153345798bc773ec4130"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169168, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "4e4a1966b0d42be5a7a39369a5f66cb6c821517ca1897b28cf1df1307f131387"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "7739317d1de126fe34fcedc88e023ccaa7f9d0ad80f5d6828c987d4a948e3cb0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "911cf744df4b75669be6c29192cd73134a98ec3898fdd83ad87959eb41c777aa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "25eccbec59a4ef11e4f9cf44870f51c0f54e2ca0efa4aa5b60085b7788a63b58"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200824, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "f748330ed978d30e198ab7e039dbcbecafbf258b46ae39ff5b754bccbe8ef5f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "6caaa0daaa3a89a2972ef3588c4fbfe8705bf6b1147e6457976771513c079528"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "586ea54c82aecc15568a4478088669b85aa173fa753ff8f470c22847a2e98a99"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "d27a5e729305e22cae6b983902c9c477ad6a5e330538582106623f1142adf7ef"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "2d0d401a424dbeb848eaf7169f820572cfc2f51759e9312b5c476407e99ab28f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "634812cc0bf77639858151c34db8e42ddc8af477866f80024c626c08a225042c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169168, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "716dd346bdc75cd255c996fed228929069979156c5b9668fed844b56a502cf91"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "f26b594854de7b523ad23dbdb5a425bb7c1eb14133ded2aba375168fedba83b1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "667acff03dee556d1914f273b2c25e0a81e8a43e7d6556fae6b3f69fe12b6d23"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "2a57ed946eec20ea94e40bf547fe18cef3a03593da59bf2a6be57bcecfc11239"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200824, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "dca73315dffb122eee7b971ebe7a5c24f42e4ec925a87bb7cab6d62865232fcb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "b76abb2a5aa06b831d440d514f72d09f60e0ff422a26796de8d030adf2d95887"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "5b368deaba9341baadd05490961c6b3692b0f59459d9cdcfad66392320da3be2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "f54020eff0be71d46c7a42d6b34e33d096b65c43ff7352d3dc11367b782725c7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "5c1fd3885cd392e1a2640cb4aad478bf3b7dc20d24b3848bcfc0a3d911fda1a2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 127120, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "faa24e2035d517a0df40090b7b8461e3781069bf7a07bed8e0b39615b5972509"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 169168, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "230962bcdb289dc3715559295673736a874bd074d674de37bda8a0e7bd1c1ef4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "edf58b47681ada425aca54aeb304c5566873f8e292e4972490ed87fa34bb9e59"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "af8ed68903976fdbd11e0c348e55150abae89e1d682bf859b967a3a42c90e4c9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "e6c79476c37e63967607168630cefcee1ae89b73fa2986e14f223453c90c4e93"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 200824, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "1774a0d6fe5aafb9bd00abcec2ab2d892947541e7a2240a007c4899e18511f0a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 196344, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "000ef5e07ef5843ab68c8b6a2b620e7903977d6a5944de62ee19c8847eeb046b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "90aeb180b7fb75e2126bafcddb2ac6b45101be1eb85dabf972d79b4e3ab9b804"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "598db7ca77b8e9976182c98d66769599416764a6693e49820e5785e64660e53f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 127216, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "42ef50891a1b8095efaf5484906c9ace9289857e25f1f13692598b5138ce554b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 127120, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "f1be3ed572c82f4aba067869db5b5d3e31119477ecd8264d40e1d6bed61757e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 169168, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "206ac3abfdc415b7efa183d919cdacc4d4d7670e4d382c5f608aa875da402bf3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 167024, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "c379df5533678d4dce822816421777c13e3199a80acf62e2ceb0d9819f561dbf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 163664, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "d6b31c51d57fe0244ab9ae715f1ae70ed4a1be7f93ce3b0752e3c26633cd4e89"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 162544, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "6eb367aa30ee4a3c2fd046882bf63cc9795c4453df8a2f1965f3dbce596aeff7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "cec9ccf65005ea794fc625307f35c7f30c6cd9500c600c8dc913664bfe465a9c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "633f53bf3ec35a0faaf472f8f0f0628364933ce47c4edec263e59ae3029b03c9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "e8311fad1266f28254365e12c8669c5384485c5fd7355e6cca6e7d10dd1f76df"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "1d29038bc03d8dbe073c14d25e1ae7618a6441a52438b23dad2667873737c4b6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213624, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "018dbb96ad8b24f343403b8e88e73d1d36d3e8ba5e830a6dfc72145c641acf70"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "1c59d688b959e97a8da273d994f9fe2eae1d6e253e88b9c9a574573308affa99"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "ce0166438ef7f70cfef409b644e1bb5891399cafd4511ab36261a40bcb09d6ca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "68f3c6e7798ee974a8e48e4909ca8c1e8e88aa271d8019f9024a233c0e9a52dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "f67f8968baa21f0324dc911f47ec0bc552235931b852fc08b82b2067a261fab0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "23b84dc67e7c265ea8e295f9e2192a314a463b6f5add916090cbcf60e0a40943"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182480, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "610e861b828b5f21d0f604f6a495b88b7b8e47046c818f854990b8f0bfa6089b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "828346cfebb3436860f8b9a7a0653bc566f02fe00ba91a59641af1d4e6e62d9d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "1bb81fb65be6ddd18636ebc500da183948341c62b40375bf3941fe94f5168735"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "bfd2821edca0b3cd10015d44857e4d083aaccd8c52989c25890f4d3df49f2267"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213624, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "472c10b43b5f12230e141cf5fe3ead8241b1b3b09bf29aff250dc4d3e8c3d7fa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "a4be8aa7d10c7726e40a2ca6365e40d55b41c172adcb2d3d8b2cbecc09236048"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "37cb844aac630ac66256e0f22a8233bdb8ac33bbece66da0886a1d9a63efd0ce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "641c9cbdecb093855de02cd06c290821a71d9f104deacbb84286e1e2a139e58b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "d51da6e7742be64de3cbb4aa0031300b7c8c3f5b3b457567094f66226b77ba3c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "cfa017421202afcdef34ce49df9891f9c90b8c4b9d9df7d56a4047e8e9ccc30c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182480, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "55321c946e31e457685e09e703fb1617b167b972b91839b7063dc94a41bba0df"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "01edc608c39b8009374582d8e60b3f7e9babd06eaeba5a189ff83633fed8aace"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "316f8380c3f569082db521bf894cae51e5e774ea9a5ee3a12261cf95cdd58626"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "ef3be933e1dcbe0a91bec0f4530125c157a31cebcdfe5be72afb9e8f81511386"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213624, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "7d3eb76d0e240d0e02e9ada4db2c7895141bb9df9bab1da5d9963b38a038edb6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "ba8e949d68ce117f587b6a8150ba45dab61e74aaf7624c2914bc57a0c2b67164"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "64d4e91cd357ccc5266f5009c273bbb2e7247048bdffdc07ddc4c0b3756b0d74"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "e73b78b8bde77ce90a828912c10a8a139079181f67e4c878af765f31f055f8f0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "6ff514c284b9e639ccc9c1bb666be4136c86d618aaba653f0ad94d042e632d97"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 224480, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "84aa0df77ac668b4d05af656d02f60704bedb7e391124d3e0657ba3effa93ec3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 182480, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "49699ccac6af75f458a7449e0a5dc6c4c6e14c9da9eb57efba18cc8fc9fedcad"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "bbb835ef1969328da14d3f21bb923510f2aa685f8084eb56e9f961afa0a95eef"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "b1340d583c37df0cfca4e4559813b377f3f54b6e0237c8fd8c74499fd55cc632"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "5bcd218785909f10bc755b6bfcf9fb17043682eb36b783cc953ecc78a7001ba8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 213624, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "0b816e03d9610b6af85442f5fd71dd903d8da6fa8f555447aea7a7f9abea5f0c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 207096, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "59a00a340e4c1ee937ce23e1c423429ac481052811b6072d7006600064fa9d62"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "640185e57bc593659ad8ac63d4fcc3cd9a3a56158f05ceb6d79c22cc0f4799ba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "b913719e84cddc5ace9cc8165dc62068329e2628b810511a74dd4ffced38c551"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 224576, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "6cbe9ac327585cdf3a3b63f2703c5f6bba692062bee46e9499b8123b648d88ba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 224480, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "f2f2d225a7f3b17754d04559a554c97e4156150d592f5eddc93a640c658f451a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 182480, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "786d9bc7264812a2296095d4429310d95b243db7a13ee81ed93e45d0eb353217"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 180336, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "cda79cd2db7fc931a80975a9999ad30ff926190bc711fcaa7ede70684e1c8829"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 174928, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "7becec11aecb9561b0df1834952ef7f97d6438ebd2484096c80b3819dd3d9bf1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 173808, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "24eedcefdc77dfb7b358c992a6693af492a32e267fb9104f3bbf575b61b2735d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "96249585872dd5a15c00c72a6f27bb658bcabb842f8dc416d445abecdd2dbe48"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "ff7dc268072fc94c9382802e5d91e5f6267a91a3819246a8c819d1765ab76b7f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "e8bf8bc95b65748a194c718f191a85a69226f35e7edd4d92a006cfa88f1514fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "6b26baa69f0db4c8f58f2adc7f5d6d27d3da699ab527547e59efece981142480"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 32, 0, 2, 16, 0, 3, true, false, false, false, false, "d491f05c6e3842ca22fa7b34ef7df002c92649016905bb7d5bf70ea337bc489b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 32, 0, 2, 8, 0, 3, true, false, false, false, false, "7a39236b7471ee083860aeb37a4d816295f6bb7076bff1e61c7093af61716b1b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 0, 2, 16, 0, 1, true, false, false, false, false, "eb96756caf8166f3b9013c26ec86e810df2de587ad1bfd23a1251c69bc03e61d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 0, 2, 8, 0, 1, true, false, false, false, false, "f0c0ef8ab00b29760588ba440c6f6a41d73c47a972c2c83194a3b93047a676c9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "e7b65106ea5db914779532c4e612dee83dddd7680eef49bbff4298542dea9ecb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "42aa6678970323a6c5bd09d1cf0619f3cf06ecbd7362af3e23df753f08c4086b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 32, 0, 2, 16, 1, 0, true, false, false, false, false, "56429a48c1770f605ba6ff220bb67488aa86e53d7e91a714b6958829ec085c58"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 0, 2, 16, 0, 0, true, false, false, false, false, "85545f19d534b3652c00e2dd32c8aaa286665269dab875c6890ea8e7e06d186b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 32, 0, 2, 8, 1, 0, true, false, false, false, false, "4436364384a32c4aa9b574a8e72035f5752451c36940745364219466e66a5644"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 0, 2, 8, 0, 0, true, false, false, false, false, "944670eaa18656ba58083b89abde13eff26f21cb1727ea30456d78ad90e655dd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 64, 0, 2, 16, 0, 3, true, false, false, false, false, "4592dec90eed34c701a7f8115e01aeab0652cb74513af96d9f3de07e1a84d750"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 64, 0, 2, 8, 0, 3, true, false, false, false, false, "75d2e367d682cca150982d60f95389350fa1de824e5a44b478341c6a03136be5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 0, 2, 16, 0, 1, true, false, false, false, false, "eab7d8c7ed8805ce1bf1e4c570a5c889dacf602b48df4a9853713c278548147a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 0, 2, 8, 0, 1, true, false, false, false, false, "018c232103f63037056ae2076b97476c01caf3c3ea7ed0aa427f533e1f5dfcca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "e2316122c4390eebdc63e2c3a80ddb683b3312925d0d1014c9e1ef34eb523e67"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "debfadee9aba3cc95103a8ff009d6ae5cdb39a0dfe4964e2180824ff8708376d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 64, 0, 2, 16, 1, 0, true, false, false, false, false, "d0e0f44f93086ec5a4d7d241aec81a0b40529db6c3a4442ddfaf4357d6883a04"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 0, 2, 16, 0, 0, true, false, false, false, false, "3305309d2b24c8765aa824795253cba8d5dea0a9fe3d382fa8c906fc304f7926"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 64, 0, 2, 8, 1, 0, true, false, false, false, false, "8c981a860c3a8be3e3df5814728ac938dfe74e548c9c34a9205696db02712cfb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 0, 2, 8, 0, 0, true, false, false, false, false, "f12b59391f5d686c218b56d1945fd565a8a7a5d00c2fded5675cad324a641958"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 32, 2, 2, 16, 0, 3, true, false, false, false, false, "1591a3b4cb1b3ecaa750022e7f92d874fe30f07c224176f0294fd0d8391550ca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 32, 2, 2, 8, 0, 3, true, false, false, false, false, "0c2660eb6c6945d3203babb454863f7112d44e0f62bd0928aa8c8972b1101451"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 2, 2, 16, 0, 1, true, false, false, false, false, "7eadda9ec8f484dc71ffd579bee89a3b692e1316e02bfccba95427a22e412ce2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 2, 2, 8, 0, 1, true, false, false, false, false, "0fdd5ca8bbe3b416775ed0602e17a192505dde1f68974404363ae48f9f866fb7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "f3760397ebaff484155112c2e29ebc21940e3a2c86da7b706b80dc2f51aa01bb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 64656, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "ed628c99e48a4d91927358f122a318a8ad8eb96c60e905fec4f6ad0624463b82"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 32, 2, 2, 16, 1, 0, true, false, false, false, false, "bcc1cb67d3fbbe0836512a1361722c27696683d7202aa39db3208be9ca9c7e3b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 32, 2, 2, 16, 0, 0, true, false, false, false, false, "70a698a0e1fd55eb7ae29b4ee4cee367e0be88f5a9fa6d3dc99ba65073e214a5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 32, 2, 2, 8, 1, 0, true, false, false, false, false, "8afd36e51794fead0587260b7ad64a2d5e53b9e93f7104ad15da031d67f9ba8c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 32, 2, 2, 8, 0, 0, true, false, false, false, false, "41f4ce66131acc7fd6b69b41cf08a877102bc6cd8a88c67dea6a217f656b1a98"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ16Kv128StaticSwapsAbForGen", 158840, 512, 2, 64, 2, 2, 16, 0, 3, true, false, false, false, false, "25a73e1d6d5863fba9491cc47c64db15ffaea82c9e9a478e7809b5e1af2e3af5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvCgaVarSeqQ8Kv128StaticSwapsAbForGen", 155384, 512, 2, 64, 2, 2, 8, 0, 3, true, false, false, false, false, "60f8fd1f8637f93f7dfaa1bfe290eeb9c2d83bf9e88fe9e14b8a3bbfb9b73a4b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 2, 2, 16, 0, 1, true, false, false, false, false, "f413b0c843b661657f2dde791104c02e01d089b64d1c2325055a01ebdf96a06e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64MultiCtasKvVarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 2, 2, 8, 0, 1, true, false, false, false, false, "178b8c0775d9a4471750b8d4712dac3a54a577ffd8fffa6dc19a7ca41a432c73"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 64752, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "297373a590710f53ec2ac9e84ca7fced4abcd519f5daaf4f94170940b49d897f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 64656, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "76305f32b95132ce498c5ac1059a68892308dccaffd1f4f96d94092d81cdf9b6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128PersistentSwapsAbForGen", 125136, 512, 2, 64, 2, 2, 16, 1, 0, true, false, false, false, false, "cd41fc09bae5a999fb2205437093c135def419018acc1d4d3f5f62bd18f3a928"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 16, 128, 16, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ16Kv128StaticSwapsAbForGen", 124016, 512, 2, 64, 2, 2, 16, 0, 0, true, false, false, false, false, "a6fa67752a06c1a05eb53fd1ad3400a6085290095fde02b1e5ea1e6695111421"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128PersistentSwapsAbForGen", 121168, 512, 2, 64, 2, 2, 8, 1, 0, true, false, false, false, false, "db0ba3cea69417ec06b24019f1ae36a775f66e0680576b9e0f01ae47da89d511"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E2M1, DATA_TYPE_E4M3, 8, 128, 8, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin, FmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen_cubin_len, "fmhaSm103aKernel_QE4m3KvE2m1OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ8Kv128StaticSwapsAbForGen", 120560, 512, 2, 64, 2, 2, 8, 0, 0, true, false, false, false, false, "a88886a4f8d324308bc946709be200b5b7d863bdad76ac6742eb443027fb6e79"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "ed0fd1a0e92d2fd05dd62cdf661550b5271d02ead35b91e05e65de051adc21e8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "3114d60f63ec476e29a47bcf27c498881387fad146d2f0c631a06ba8bbbdc73a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "03aa7dc830f6b820223356de34d12ec7aafaf9a0e184e0d99a5a8b923aa1f4a8"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "d7376e058a122d1c8d954ad02507fd55f131e8018d369358f2f69d25035ecda5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "9e793334c59e5592a8b88ba6527385c1aa98c64c109dd4ce725db9abb2272be3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "ed2c3ccf6cbfe1976c3b633008053c315c7d5c5bd0781127d9081e8fb0ac9106"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "5864d64b853f2d82bebefbef10b23c4f66e0e6c214e5e9b1edafccd3e00ca7e0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "cd4f6fc784db62948fe0bb0450ba5ba2dcb5ec9b9d144c471f4bb8913dcc014b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "76b84ef7a9ff20648d5d45579982e36160f59d288711117ca557fbc4fd22a1d3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "c6ca7b460b0f8f6b1cbb1f37ca65f72234af7494ecc48b031b2add659ca3863e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "636a85cf0e3d4b3a670eae26e4dbf6011b90ae0e15359930932daf74d2017eca"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "eec694d468766afa3bf2d9515cb51daa009afee1f7c83a154ffa804544a0e7c3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "6ba712dc7f445f2108e0e9a3e29c43d9e654e86f89619314f1c141dd481258c5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "2ea339c2cb4e49d890edffcdd4103381dac29677645c7cd4d1eb9a33d91c3e3e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "a2b70ddcf959026c70417a88a1ff74e0934f803ceefe208c9dc5951e99120d5b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "950f3efcf2d1a4dfab705ec52188276b3d3ea047eadcaf7be6cea64fac51b7bd"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "066a92a0510b5806f5cf6f4a4155cd949a953e3a14822644143e48e4293409f7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "e7e26dae1d0cba5b972986967b96ac8e6eaa079aca1aacff86b9679e869a6541"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "0eda90bd79312d23ff7c8d51cf9e061cedf8dde7c658485af096e659db49ef94"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "fcbe9840f818a4a0f0d86bdce85271ed40492ddb7e344ff4c90a199eea0d6f99"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "2f72901b9fe4c17ff79f8dedf1b35eac14adf6dd8603a139103f7ca9c7c6d289"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "89ffbd820ec67f329be9442b02f015806b021e92796af0b8233b731e7cc46ff9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "fdddf33be68342c12c05d5a47648a76fd7c0185cd41ccc4387c55c6ba861e2e6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "854b7f7ef1e0ac926e1836f266ea326e07a148e3b18ace39eb8f9cbb1f9c7bae"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "a86252df5c0f6cb05779e0470ab81066e55e4ceb994e08561c9609dddc2f62a6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "cd5d40dbc4b6edcbb43e8fffa2841e34d93428ec87d25b90280e2982cef07305"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "44a97a5be71ff103f13b99770eb0275bcfefe547c76841164bf6db0768675cb3"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "8d29348312494ff327c43a5e7ed7e7dfeb6ac22a76d19b0ed7adc3747f660114"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "b2aa71c8ef1ed3c4230b3984539a0dabedac5120a31fe6b04184eb1d3d5529eb"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "c0792f2abda3d8de896f15697b1ce82c6c0f949cf0e37616d0d5b494d87fb625"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "a9ca3b74c9b32daf6827275e57cbed85ed675f906709300c5d0637d3a5ea7a7b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "d59677ad28917c353f594f59be535379104e4b8ce3b6466e6a4285bddadbd97b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "5fa98c592e785c9d5dec3151e5cd7da46eeb4b12b08e6f0989c43acc380c766c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "ff77f8e2a7c91418dfc8f5b9e8ec711f8ccf670c1369b81db3879508811964ac"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "76c65c651f9336ef60ef26406d8ef1dde0a55a875b01b669873a778522c690be"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "2bd50d7cceea41a4140d89880daedd66f02c9a3d32cff0002e269961b9c73db0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "3a06a61f79246265f820a0f61780f0981dc19f624b71140f3c74e386faafa439"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "ffd5e3b92cfe6a7c81c027545fbf4b028faf2c8851ed48f79655218f4d9a7940"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "536a4f7e6a3d1ccf8b12a453e35ea987fcf2fbffe7a9d0c9553c020c56053a68"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "e4dbfcda778e32514ee0df7c533ef2e6a2ace9a1eb81052c447aa97839a28da7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "af72b9b70fce987f55c61954f8b1d802a76fb742716523f7276af7affce76ffa"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "9899441b1f84be718038cb7b25972cb11b720178db1021add1c79068c8d7069d"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "8e1c7ba3059b060400a9e02cb38a262888df668ffc406058f699df3cfa683e82"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "d1ad38c06ad6d4366ecc08375a3acaada3ba02eee7eabf3a9baa60d1c61e611a"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "ea54218f666076553951a60090dddd3de06748a0380d4518bbc58987d1221a0f"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "6fce2b3a691b011e3ec467d51fd55f51f6125b4e614141b012e1a987c7480f08"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "fb15e1d7d950205371e0e235c9b9ca8ad593430990e0004e3e3d65e4f4a750f6"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "626520cfb16c565e349fed6d35e5b94fbaafe8b02b463fd0ecedcd4b9a3e50da"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "862efd32c906fd35bc3a8b6954d2299babd1517a89d69c839a6d9c0da7d1f7b9"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "adad01d4ed17a2953917f005a54f5110181d7c12d0a7f58865c66ab115b0be2b"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "14a98e8f8ee2d54d3dd645c5695fa32e707b7d0e934a66e44535ff29d10c12c1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "24bf7caf636cc88004985a2f2b7f1529bbc74566a69d9636248e5bab755ab0cf"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "d277e3ea2dbcf487a5b2d5b8f69f2ea6e09ddae8e0c3d068817d685ee2508a62"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "24f5b7205732e0eca335fe58b81bb21a2fee018fc62c15f4f5589dfef38b93c0"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "ee0656fcf86a8a7ee0b4d27bc657cc425bd771191fbc3521965476b5ecb63f80"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "ae960d0c3470ff6e9cf74a8cd9531d2c31384d986964b6ca86bc4ecf204aaaa5"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "f0696ff39cba9d6b7c85d889b11c2d6fa3913f8d8c17a53b80b3a470b2bc8d64"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "ad20ce6c0472cc4e1a0b5b6fb2a0b16cb7a958b002a590f455f40b34f223545c"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "04c8d6197ae0eed1db9b456368161982e6fe9c3ed168a84f29810277c6d66639"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "ba8f4ba981a0d020decd86e1f5b903c3adbe8a42dce3181a85eb4d55d857910e"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "f13a5c13827c4d6881dd961a4dc1d59e4687311cca4829fd599bb9630832d1c1"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "fc8f595f85aad17dcc156520c857dcb7cb4e926fa5d4a4819a2f278382b9adb2"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "69e36aaafcefe25b68f90f71b3c66745f953912d6083f62eb87bff8fc05ff297"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197744, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "fcfdfd8ee1b65df4cc99fff29576e33166201c83b296f6c3a1a061681086b7a7"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197840, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "85eb314552c299e8dce1d1a8b170722f7c58fc37295a9cc85a5d71d8cbd78d64"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197744, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "cca32972de053329de72567cd977a47cf6a7be7fd58dc35829c78b7fd689ca86"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "4d55f04aea92597f45392722e2d9af252fb06943645371803746b6cf2104eddc"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "f2bfd37b73c64be475813e01757db0a312e6ddfa3036f8364c562d57224be082"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 196976, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "be1d2500df5b8d3759d2e0108118fd0f9145c90f400bbbced2c5da7756b62c25"}, +{ DATA_TYPE_BF16, DATA_TYPE_BF16, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvBfloat16OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 196880, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "d62ab7218102f427afe08e018b0d9572d9ea405663f55bfaf4b87b179ad1bd9a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "a90695355cff072c9f009926c69145f16795e51356cd44e9d4090285d4c3e936"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "f9f5b6e27b925bfb5eae6164b1f251df1dd7632e4f0e64ed17b2225b6ef3cf38"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "f0d770e2db835c045b9c61bf587d038f02562671e258479c6538ab23f0381484"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "c347f184fccc0ab203ef8d566189893277de1aa07b4716b11866329849751590"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "9d23fae7831f1f26bf97075285b4c7337b5e13d9146399a21b9719abdb9d64e3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "a43290d5f4ebefd822482a907fffcf3878f37bb5c8729051d0d4569f581070c3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "fe2eb45a452473f5cc35abbc9cfad6c06c03ca43310191c85afb7089126923fb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "7d9a1982d4291e80579132ac36bda6209f005bb650829a32aeb616d01cb92ddf"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "13a75c8ad09245eae45ec5cbdecf66514f18fded0bdea2cbada6e232fe0b16e9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "21f13470191e87709a55cacf9db81137ced8f0e020c4db6a3454e2303ca61e31"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "ac49388d17e9421e7edf614235d79b7652192e79b0feed16d80dabb46a656a04"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "b7bfa5dccd651f00e0d9e1f894999867c4c08bee727277ba41cf804afde80d6a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "afca4fb0bd2eb26c0897816d13cb9da453d8472160df5795449a6dba54e48985"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "36816087bd314d42f851b15cf48e2849141f10bd77f631c44252a1b365630dab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "3269fbe16c212fdb1158fda1c8abbc67fe68144a92ac0552e63ff227f6899392"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "b319d268dd377f1e436ad885dd63d8523609feaf3e69e8b4fa39cdd230683723"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "7c8f1a3442a3c5e1c8c3e20aab9c208e818901f2fc5e7eaabb57a7eee97dbf1a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "89607d87ebdc8e006989e732c7f029db222a2dd6776db48170ff4ada21e110bd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "6a84c65f2b14dd9ead65b4113a74f709f4ca764c808ca345d9d5946531e37247"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "8a9268670ba8adb9d27ac36d7c3f1c6b8c974469c564be0438b974b4ccf5cca4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "a02c03f0142fca4e2377358efe4c562278e7a19ed0d91bd41ad5f80d27a96e53"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "86abad0abaa846585a6a3bd96176a8ecf37e6f017f13c4540cb2a3d4df00917e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "fb0aca7e8a380e827c69d9d405354b1f18ab8519abb2ae4ada024ad6b419e766"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "3a9ed9a4e0922ec792ebac56fb5c6e3b0f45d8db7bf3c674f9a39d4946f04663"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "ba2a5be8dd6915c18ca3000604be5023bac3e78487bf475d9befe81058f1a478"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "d975944fc7c978737c636fb8338dc5eb8dc8f47d1077013abb0538d56a437ec4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "160240b0c02da2b1a82f63a8e8a740bbd7738ee3e3d317fea3f7de7d9ece4577"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "52d02cd28446bd5eebdf676146cba47bb5ae6fe6c250da10eaf8cd74945d3862"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "144460e08a9a51ea6a4899daa6a6bddc25c8387bfde068f722f03496fa70f815"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "b41a9f3b7f0e06d06ee1c22f509075050eeeb74ff81dcd3178fdeb8217db3032"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "dca4d4b23f391f70d6f99fa7a7dcaa5ebc2ea3a9e8def8318893d1575157645f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "8d3ab2061c2fd804aa5baaee4cf19bc102e6d30b7c0bb09ee1e151f16a91edbc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "871d517d34f9db01f974f3731bdbc8879097c78b30abd66e958e59adbf6b9bca"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "bb4e33860a17c1ee421d6f36e46e24ead8e4f52d81dc7b845628c90f44e98e9f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "143b2b19174cabb96e121fd5a2ba3aac08a1ec4edb2ef9b6f17d167d83214d19"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "e40f1ef4531980d6013f4f67f2647cb6fbe532ba5d16da071f19ca10ad3a3589"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "e86160c615e0163448d29ccec2e846c3791979678bff88a0041d06c8024ff943"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "02333632dbad5627db0ef3eddbb5cf3d1cdb560db6c487ee966af8b82a2d04ab"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "72c6d6feda7ea5080eef5e52ef422ee35dfc8b3ba93eda2efa569a97ae7c5dce"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "09ef9bca859603d8edbd1ff1fe4b462f41703b025f2eaa3255f368ffdfbeb8c2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "6f2ee1f392f0948aa86df153aba8ddcd44519f1b01d08624eddb21ed73163ca9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "ce97cad8069752f70c595ea01cc8fc1f5e294284b761972153afd3e683e2d499"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "9e12ad7452b72a62654d29489e3d829232a901d508a888bc2a5a3399976b74c6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "1cd7bfeb753dd4893294acaa7f1adabd61ea0159a05aacf651c52326b5d92fff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "5b47e283c8cdf155597950737b0e498e9dd49db25a015b08b76fcbeaf5e6f8a7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "341d859c9c43710cf44947ea4384b6949cdfec2a8c400e8cf9c6f59a2004c230"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "d181f58be9e6b31388a3fe459e686798f36a63d773312d98e3a7b42872321f8b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "12ae2be2bb45bc476eba7aa3255fa1eff2054ee1782457df1f8ec4960e45823a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "a111fcd3da5c362556e5bb3e786a448e362f59a62abf9595d3e92a2251c0a060"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "7c7224543bd3cd2f4fcbf64c80eb7150bcdf4ce4a923b6e51fe70b29545f2ef8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "d46b7ca47ce2061c15a5623c5445d9fa3d4d92cc6f442bc15f1618752f7b1738"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "edef9379caf3b2a6e1a71beedf18e5694dc48e308c57401f0587de908e2676bc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "b54c2f8f7f3787fbccf9b595e6703a26e50e908740c41b504cc6079a79522a43"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "d48b58a58de8dce0461cb211400611645f2dbd3a0c539d9b9fef0140b6198849"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "aee5dc87310f441f2512fb5351527e6b4bdbce4a3095fc269656441dc8e566f5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "bf1a712126388a5632bcc950512ef67b1000ac9600efb7dd4d9b323bae569cd4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "8da95818634f7423fd0690c29a26d64c2d915095b3b0390416066485eee6f210"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "44e0a615a03d6ec4dd3794970096d3f6defb359fa9d7325e28df00e6c01fcaa9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "3050dd6787b2cab9094af89e7b32bd9ba4db7603a266398144fd2eebb8565a33"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "2103a29cbb17610d104e4f46cb9928b411aa5a01a055a48fdfc1bcf8432bdffc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "13a5614bdc309244d3f6547ae622ae2e1ce43cc2538283b9cd8781db7312098a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "c112c35c42abf9412a2499e2dd31afc5edd3a68d030c725b2a63c1c747e5cb40"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "3d6e5418f0a55f580a389c023832a8477be75588e50e4ab768826a4088e910b3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 115792, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "06a4537fffa8d351d2c7fd411812dbee864639940241c5697314af8d12a8e297"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 115888, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "bfa56c35c028234888ccc072b61599775b4c604d1f7d491f7c6cbb9e97d497b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 115792, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "709d074e5d837bc1ac7419fa35d290c7e7c0161876f6653524322ba3ff5049de"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "2ff4573255df60c40ba406e377a7bcbbbc77e2dc2f20b829385a6db4a67fc79b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvCausalVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "482d08ef46af0b81d50913a643c757d2639f483e140030262f8cc0f620b4410a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128PersistentContext", 115024, 512, 0, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "6f52ab94009706ce6c5129cf7aa60eccc89dee10a7557ba7c94563bfe8a68c2d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_BF16, 128, 128, 256, 128, 128, 192, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OBfloat16HQk192HV128HVPerCta128SeparateQkvDenseVarSeqQ128Kv128StaticContext", 114928, 512, 0, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "474acc308fad37d158ca39c8663aca2f5c8276819ca88e366ca64dbc67fb4304"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "7827f861a067978a4b5561bf37e1b13999b5a20a72eb5ab0f57e68b7f195ef8c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "6b10e1b9139da90c1b9b6bb5cf86ba0fe6882e6e03cf1816af2ee43fbb6727c8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "236fe1d995d3341d5a2ea9b4972d4bea47e643cd78a65c8c443a314f3e256379"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "d5c07647d8383eb409cf2d5c77dc095f6444458aa109d05086d233f0547af3ff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "c813f3d27fb2d68eb36cff8b503575cd81a7998a249a4c24cda8af12be0317ee"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "35e255a9def3f7b0301ac1689cd10f71a36f125b43c941040bd269a15e9d4a6c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "117721a2131f90ee8e38d395c7d9089a6e6654363e75f9791924e665251ef5d5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "731c2bba7ddec6ea48dd7cf34873b773e055f3e99f0d44e80397c77679520255"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "d57dd892ec549c3a6365da7272cc1e3da8b60c0bacd5940998666e080e5b9abc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "aca4065f41e75408a9ad72627f81f16411b6767e25c31d6ff9d5be033d4f5bc3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "d28f363976c380d52b0907c82f21f74d47cfb6b497ef4c13b7c884309967b5f2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "a2ecdcf9992535157c496cc04f5a7cb3bd0c2fcca6cc34ee2b52f24775bd56f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "b913cf4a5b190580fa510af31e8240e3568a857b3d80c29bd0ec0b16934775ba"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "1276472835b9dc1b612663b51a311cd64649b876a4e9dbed6c8590555714ef9f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "5b706f3d0af02fe301f23e64216c0ecb5157891ba5d9575029a597d85e331cc5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "14f3af5cc1f6b6636aa898faab78dde7a23c9196bfb5b38689a7da8581148414"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "745ec5b0ab8fbae957618a72d1150451adc12727abf48db57c053322a91b39c6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "4741db5519ad9c874b4d63fe5631424077bc3a9482d3da2f79e465f11248895e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "74707ec90ddf9eb55cff42110f8c1b4e02b2fba1c3a992b12ec16b511d4d2821"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "0289c96f894fb38747de0bbf736af8d3a7fe968cd8f6dc0586172cef065b10aa"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "8cd9712c35e9d4b57ea6f3fb42889d9e64e6fe8daaf56e13e9b8b623c2ce3483"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "792993a8c1430f4526ddb5b76ce4157b8a476595551fe939983bf7c560984d11"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "64c800f03f874c0be6053725181f0ff9039862aa58ce733b6cb2112068547a9b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "933bfd0a8077a85ac36790c1357ce48915c638e20170c5cd8e27a67040acec42"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "09d814127130ace88e0fdfbcab202979034c6015522fe6203d824e312b275dbb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "5630a65b0a558e7519c87c959fde1df26181c819c583897ce46de3c3a0223def"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "8fe17a54bdf442922e7c580dbe02cb5e3ecd8eb4a7a97e74ffb99f31ad99506c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "1d00202b74001e748b51e51ba58f4f0d807a2140badea4973f92b0915a6301e1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "e54cdbdfb65168d8c2dd68f30b9fbddd8a9ab5ff07566721a24fecad681dca42"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "7f6f8fcb538fc69c2265cd08b2569b81728ab518001726eb659248ddba4d5e21"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "40198a3ae95d17da3d251b3f064a80f96cf9a6861c205e1e09737fbf0390454c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "6eaae4978b1d108ebe9baae124288b2f302771196b00e5663169e524f35e9396"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "b88c9b382f037e52f0c6842322b6c07bdd9923dbd74aaeee551fb8f338818610"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "a46da4414ed97b5728e78f0fc62b5a6a3685804975143dee0dbe19aeaf9085b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "4fc7092c502e1888ab3ebd6c43629b0a881f0ab6617fe52d1056fd8f75ba9998"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "c340ff289c81ef149fb126486e8db0d9ad27a100bea3ea1a6ed842de3fded316"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "e9c11b029e5d99755c6777c085751087da8ebd4ba16bccfe298598bff9c32c14"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "130435cd9915c686dbe252ce41c4f02b71b6baf20ec3271774ed69ee85ed9b29"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "e0e0a1f3099aa1997dead4181aa74d4884d5db6e0e856175760162879565da30"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "f29ba2be159d1201f065160f017c71d64bd26c39b02b144c80a96c5c4136b439"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "dcb1782961d4c02ead14644029f665f11c3f73e1e70f186a17c194f407e812f5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "751e253943c85ce9b10b4df98927147ae30ab944f756e91a4dd5a7f02e4e358e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "707392bfe802fe36f7a8bde4f46721e0ef733ffb5964dc78b7d336f5dbf5904c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "4f367ff048b03ca43055ed88421ca3d9fe193b601719b9147fa456e803f8b789"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "30d6aaeaccc029c5843144632cd61a0f48821b4397a895d1c951595540090967"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "627d90630f2ddf978e117a40dce0996a379235e832a345675cfc2f745b3698a8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "02a7300637e5f923c5b3b5846bfdf03a5b27f63ee1fdf83e33a3701acea646a5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "5b24730d8b6e106e68e11265019e1e231c4e9075dbb158c180daa931d338fc96"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "38a80556eae4e2fd25dad3696aadb4ee80c85642de3587e494bc3b2b5b43a51c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "4b5c46edf013965731e5157c3811fa0d595c8109d4e5d49f47ff4e8e46a3a5c3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "f8710aacc0424cb748ea4c2f49e5373c8cb6ae0c6f652d5a05ff1533090be757"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "8c9d134245deb3189ff73b03232d6883dceb094db3cf6a4bcd7a3005d2224c23"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "26686a89bccd749b00e6923341b29d73a9e0f9dc7ca484e3495c0a64453ae67b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E2M1, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE2m1H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "bb6ee5952722a6692e8a08bf8322c74ed4724a261185f797b2e9d5c61c00f403"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "477ac564604a8f836e12fa39c2a985abb7d8d9bd52ba384e9423771ac60c3963"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "ae1fe3d39fbb9ecb21041f147809200caab9b79be5ded21e775fd066a06b4a8b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "4b475d1817d9bc99065ca8fa8f6c7ef652153607401f0ef58edb0cb4cad44b68"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "bdb0c09404b2e5dfa9dacf060cc9607f522153c1e0beb7da060b3372d2530ed5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "047682e06c591e3757b46da355c87352e5a43994acbb8fe52110bc0f397b5df5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "d629a4991fbeae28f2b4d9db8a8fcf0ec08e0a4047e331ac27970db2fcb87347"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "903fd411f56802b8ecfff5b2d6497162a97914decb1d09bf52c24162b3956342"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "618a21c61105d84b91b93cb62a9362db4050be3d1aca0b7cf16d23369018ed9e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "8838d1f0c14826f593046d0f4e078c668215acb2cb928bd3382b303884e3aefb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "58e1e9036493ab85ef32dd16a45f0168ad1a7c4ebee64ef34a0ee583ef0f3c77"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "332b461a4f1b6d8807a5c0e3a01966519d37535e94928ba93fd3beeac8f18b4b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "c37bae247e326a0c5ea8b614cfd17eed2d5e4d9872ac05982b3997e174dd33d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "146d90cf5d8964c19d0d67cb34466d7e4c340c9ad1a1c46f60a0d0ac9307f604"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "4808a5c8f660379fd7513c9fa7bf8354b8a646b32da580597cdd3c5cc7cae76c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "829f80b04a413c62e38078b58a787fcbcc39eeb5171f751f842152badbd4ecb9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "437006b8d5f716843cc3b3b68183a72f3925f66b2735eb71613d0bcc62c05b46"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "89ea15389d4a54a125b4fdf32ae280d636675924ac4f5973f1ba6482b43892a1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "f796a99d12a0d9beb643c0a30aedb98efdf0bbbe0bc58eb2426da830d9729ce9"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "8dec6f5ff7de93c70508889d3fcc28f01efea44dbebfe00615a4588fe9add845"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "7abba4dcadfdb75709e9dedaac3041ebcfccf38edd22aea10cec6148f4def447"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "d6b434e85fed7cd4b98ba712de1a218972de1e4bbce49fe0993bb1cfbbbc08c0"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "ad0c49a5eb97056b20047487e7ff216866977395e57a69a2d431844d6280f06a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "501f95bfb02cef240abbe1eb5afcc6b21ac1b9c24e03f25080d898c7772add59"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "37500b54d656af20479a8ab139b01b6f0dd7305b3249364451017d2dc8880ad6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "f29f20cb056eff4fbf49829fc8374867e636809f59cfa2bf6bf3799d8abeb42d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "b6aa75fbd19c9740aeb3d6e6100862c5bd6578d5b719ce7c14d553a7176dc672"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "f671b22d3b2ad25cf5a7f7e2fa181a1aab75a1fe32f1b53d96c6824a5b152847"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "ae227e5efae6230626ca824a0e4388eafb1c9dd30d6299279b97d28a5cc9206b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "87ae0711ff69ece781d5594acae7dd316d5249a221f83ff1308cae3c417af4a3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "05a61b7a5fb4a9b022e81aaa5733f97d350c85743f82ec46a63a5a758d7b1a6b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "561b7bb1ad405b3cf6d711e27fc8b564da57c5edc69734de23389ca750199267"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "963c85d0da8183df8fc97944391eedbd1879b9a3b165e654e848ea755523ca99"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "fc4bd1104a5f0f1c26d7cc515ac496161b15ea99834d88591608ab77cee2bb66"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "3d32a221858d29da44d2dcdb9b7e8ccdcef96a84e1c68cc1c5714007b9102f3d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "0360b11421fbed7030cba0d0b454a6253fc2472ce11e4b37afbc672f53f58be1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "b259a6736f2d59a09e093d0b19144d2aa20e74f6f246cf36d60628fe52a5f448"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "37ae8a29c762ef66de387adc67acf244ba36d63e3e1d12d5b96744dc7bfd1182"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "5d944f68279e906865b100518e7f3f4e8dea987e97047ce22c42692abdc5d08d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "53144259c0ce10ce75e2ece8169d2eedb676079fcfc4959c7384800093ed413a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "baaed66d5d6cda74b0827ff5bf5c4e71d0acd3cde798b33f3ddbe87ebd2b42a8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "cad47dac6a21a8160cd0fb0200cdad9abb10c9bdc5d5795a0f36a77908f416d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "b808781995b4e4ea483f5d95d3b138a0be20ca9a9479871ce2f7b36443792b9d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "f53b2c4d37c6f2f482329a7fb9b0cddf43c358db8e66875e87cb1fb9512b16d2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "b013c3ff659892e5fb6ed3d5f807499dc26887595b4ed138aec09ff65e051a5c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "99bdc197a4899dc65a08a0ed1a53bd5f09b5541fbc07d1b9bd5fcd1377a412ae"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "7c2b7c49dfa8e0ad59085a7476b68e45105467611071902c8aa53da2bc317997"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "395ff2a1502f25c0b4bf29bf98c22ab99c11dda42dd2d4c46164e51d3d016845"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "f3e44c0ee625a6b3350836ef43c3d1746434ed0ee8d979d23cb0215e3b6de067"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "bd0b59149e9532a7c9bb525ae37d8728056a6753fb6bff25fca14cd35065ed42"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "efc1cd627f7b015132c5e4acc0463f4355888afaa0d53b5dbc76b1525825a972"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "7a65d4c9edfb8ba1f0931b8dfe108f4fb8d298a491c9f76751d264d093fc60db"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "a4f7e6cacfeb9077148ef8f0b79bac02140d393b2d3d8f047225ad7bea1773cc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "35c71c9fd8e13038ed16523707317cc9b62f7cdeffa64101eff218c167690cc6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_E4M3, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OE4m3H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "6459ed8b8161bea595d386d55bd227a1c7d473fb00db1738e7c14e1c1f7c6435"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "a60b7a5f613772bda5b87e44432b8c31d7e9821233fb3bd3e392dfef8fa0a226"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "446a0c6e79ce765ec5a8bf1537521cb74e6a423a79d365a1bac8ee0589072a70"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "c12d6355c968b2e72355522b67ad6cb818937e7a5f2efe4f7b21389d04f9d7d7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "7c18d5fe49f6b0edab8b1170394b74d30017e95e64db730327ea601b1f8cbc23"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "9b7a07c74b1b0e7a683622bb19c337894a216de5414befbb30f5bf7e48aaa381"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "10314bc01f562bfbd0123ea6d2f40b5bbb238c1ce257cf9ed14bb6ad03dad241"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "535ffa8a58021b488ad4f8a3f2def76d971b3f61655ab5bcb493b29f25c62b0d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "9c630f96b1e94edd8ceabd26c58f49275e4fb5414eeb3c0d651be8abc6efb347"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "0d1b8cf57d88eeee23dd95a8e2afaf9ad67bc93af31ce9703a28622d69d158c4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "f25b4c977d6dc7d0220f57bc15ca8112811715a8638f93b8b5d2dad9674b82f7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "a6494c8131cc00f6acf87fe59c78266a3c1ab07323829fd821c3f936701a177e"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "beb888b377afeefdf49b650d882628d6c34c9a29c763e295e9c93167cf57ce61"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "f1ed7c827473793bc64d80cd1bc95d3f0c2103cd07d5e79703c5459d8362e9e8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "9314d50aa62cea7617cf877a97e095e28186523c64bc0cece4e86d594010ded2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "56f22963b5d9dc695b1ed3fd603c7bdeae94130094a482e73df3b848dd27ced5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "8a08f4a98a67f08f9ea159a47c3b8e6471815169553484ee3e50b79c133caf25"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "588665dfa9d196e572076b5f7638760f43568d376ddd89814638c821ffad68d6"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "760cd0d20045a10d6f645de721977f9b806be3e07baf1f5957fad961feaf6971"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "7656668fb14b7dc662ab9a06820e64f6091385bffce95b55143747a7d85d8776"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "74fa8831aac468b9d7b9130891d270ddce8362f4683f69805548d4d05ccf3124"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "a9711f6175d143d3232010e9a34899d2092054479c7226fd8568e5fb1c4580f4"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "f4dc956e2ddcceb01726f64117f03b5cbb78304b0ac609c7505f3d2fad6b0a53"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 213408, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "7e1b22d82f1d27484a9ca8ece80a81431d1287d75a16b6e83f5fd97b224740b7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 213312, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "60c0dbb0ca5b158c9039bb8b3a0f4e1c4e82a8f5f4e96cfa53519fd11d1f3514"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "65a9429c6a396c4e0f13b5c97d282499cb249a06854096e4b38f4768973fe1a8"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "3331c6b9b44ce26baa7121bf04f12606ab3508c3bb5e2721d422b74dc8aa2a5f"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "6e500858dfbbc7f6561c62828d84a966c6aa7b8373936f1e3b786600e6386f2c"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "85ae99d5a5db0467ee6a10ed834208e5ee9c1a5f9f175b7c77c9a36007574f33"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "e57b1513b4ff073318e85c576fb4899fcdd74ab0a5eaa680c3834592cb0ed8e5"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "0a15f941907fb558d2998556b3f12831cc64ed4c2260ee9f12603c902fdbc8a1"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "add1cbeaf5fad843c9a09aa7f67708a9b3d3886565e6be98d5ed0c082d0381bb"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "7f3a764ee8bd8bafb2a8a133b22f1dfa5bffe2568f81a9afb0e8a04746efb4dc"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "4d5d329fd7e30f666e44823c8d49ea18c7f4b59765399bcc780b1ad12ab93495"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 214176, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "212dbf17273d8702a7d60ee6bc5c60c2b3c3208ec43611fae77858f2c8f51e56"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 214272, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "d7be62d23c9e16ce13075e03e1a0e91a902a9e68c16d7dbea8847817e2b23fed"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 214176, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "0f3fb4bfe8f8f3090ce9fc998f225f1be2d3f22690159ca2a74a027340a16f9a"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "f5250f811a37bdc02ff4950ed74a0f7f9a6f8ce521996a1e02051c6a2bec1561"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "234988b52d20f741742b54b5e9ab1395934de0f7bc8f984ae30fc047ee4d47c3"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "0deb47c50fba4935bd52be88579953b5af08c087fb4169108dc95ea57d6affdd"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "ef6a1bf285b06299cdb98b184fb97f68f2c78ac60d06a190803da8c3ceed738b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 41296, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "adb1aa74211ef7984bee82c977f940a2d86298471034769b58d74e6e547b0dff"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 41200, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "16d7fc347dea14ed48a5e25569a03eb9289581b26e35fc6fa585ff5c2c54e371"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "c80f5b8d26c270e7f806302fc467bac4aaa9272ea1a5147230beea146b259f09"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "e7feff3e377987a9196c7041605243264fd15eaca4af49ca6473df4760e6297d"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "57702478d2499ad8174d362f5c17f3bb020b7a6521fe4efadc0d1c5fc0f9edf7"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "ffaa3dba722ca057ef30576c0dd59ccaa630bdf92f69a8e629b8a15bc5ccd581"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "b9008ebf5e3332c40aa3e903f7a8f2f45e0ff3f9f9f9a7f7656530fcb921e373"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "07950199fdc8fa37921e24bd4ffeea8a92e53972b141d9a73355ad71c99eb999"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "9e464d76d5f83b6555f49a941ce65ebf77439516b09826ed7c93e955dff7d3f2"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "e69e708d2fff19612a7c17c147957a8d13b813519b4f7647197e46d11d7bc04b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "e66c3c35b055c94a06bf4cfbd8a8c3dadaf6e280e1b2fcf37a3a9048fd6a2d8b"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 42064, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "4b2bbd53872baac38be28723fd37438f513eb5bf626e59219dc78b6be2c86b28"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 42160, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "a56056e645e06c48836615af43d0149c47a902ff229c7235b5056c38ce7fea72"}, +{ DATA_TYPE_E4M3, DATA_TYPE_E4M3, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvE4m3OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 42064, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "d36fc0c93a52d5db5c2628459c71ef023a60f5d6f0a8700f7d4bcf70064acc26"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "fe0b2d08a370e2c79ae55695c566ae6609e3f18f0f68f09392801de71688d4bf"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "135b54db5e85a28505c71a548dd70570bea0991e7a31b975e5e86a79104804a6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "6ae0b8f86e9c35b3d3fe0f84d73181218fc6e65d2da202e48bf600394f8e0103"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvDenseVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "eb0262cc12b96708213514bd43b2fd801ed91500053676f98a81eddb8b4ab94d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 164208, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "b8a254ec47551354c77dee9474e414b9a9ed0ecf519f40cbd874b5b150f727c6"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 164112, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "28ccfec4786746133934cb7267683ccd656cf1e0f103c31030aeee7c13d8eeea"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "7601c6d5df5da2142979777f1e0e7fb513af6ae4f3d8bdec91c011712b9e5989"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "03c9387cbad4d895e03fefed7750310e198defa92f16b5dc0e81ba42693aa052"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "a0b869a7eae80e83cd690d6103c4ad16f77f4ee6ce9edef36e3c1694a423be83"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "b45d0b4b06f888e8b5284c216dd537d00de0ea80cffc17245712223805e7845e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "cdfe0986a5366a27bc6ac2a65443e309d95c6a09d10e42863550982d6463552a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "c742de75c56b8d6756e845705ade8766b49358acc744baf53d94ac8efa389f5b"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "6da26ddfa663aed7f5080fe05fdf26f2c59af161f38236915b6cc7c78088c396"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvDenseP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "843a85ca18f743ab95b370957a485de47bfc8e078c8434b1d2d8d2fe889e4de7"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "5708eca6e4ee1f45d40ab8c260bef0a54daabc76ef357ceefda88eaa7e7fc7e4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 164976, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "811f2a8be97005744ada42ed890e4cb82438719ec5c05d075bb8f70856d7ffb5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 165072, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "21df970320d7ba23b0356ce015303a25201ee0ac5123b5030b5b6fee908ebe9a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 128, 128, 128, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H128HVPerCta128PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 164976, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "8c7dc685db8210f0a8d838c7da9017a9b1d91280eaf53db322ce815d323a8282"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "6ef97575483733820d007d8a2613a19fe2a4ea18aea50f9d85c7adda8ae49ed2"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "4214b3e015d072c6e8aa854f28c94b5a5b3a7c5679f1648a6f1a8bdb6e5dc7ac"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "ea2a63d890562caccaa1d856ab8442d313b54dc98accae56b4d916956a9c7d45"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvDenseVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "e4c5dd2951447d276422aa97611b28f2d3e989a8bc7b1490ad70ccadd3f468de"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 196928, 384, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "8a6ad545b52c369a46c59bce8053a64af710dca9471f221aa37d239e1f1a11cd"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 196832, 384, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "2ba582b7171e9eb7f95d442493b383cba13cef7209a8c035f6cb88c85cef00eb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "8c6e3e48d25029406c7514744b2187f7e009bde75d2ba2af27f672751e9594b5"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "974b243937187d37bdfa4d5cfe32200509ad3d098e63b591d30f46052d53bbbb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "e2f7693fc8c6fbb1bca5ba732c1822b5c07336585a844b1a1e3818fa72435f85"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "f41631620790ecea72effb0256fffffb43a510ce74d628776537ea4ca40b31f8"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "a2eb8e55046f074d8a35b2f6f7b49e6da918b27d90e160295943f4997ea227ac"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "c5f2ea2bfebb15e53504c59415dd5e107275804fc1cb5c30b39c585d1a954fee"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "6967a474f4d61c5b249193eabd62aa1b4adb6a78ef5458b8887ddc27a1715afa"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvDenseP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "cd8f275ee7edefeb11f1219389eae1f3232895c95bf3ad19a67cc353224242bf"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "6e11bb9536a87d694578a5e65c22a477bf96cda204abc54bdb18ae5789ac5d23"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 197696, 384, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "9dee272b4735baafb7e2c8b5d8a5c6a75aa2a4a53544a1a727d9f32456ad3210"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 197792, 384, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "de6c6410a84b97129e5e3f59b7c927b53d3c5dbcda8549bf1855369d24ee0eea"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 128, 128, 256, 256, 256, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H256HVPerCta256PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 197696, 384, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "bd39c83b7274c81c7f40a285215c510ccf8d73e9c02268aaa4c8b39f7f9d2b4e"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 1, 0, 1, 1, 0, false, false, false, false, false, "9eb99e36145254b8acfa2c19cf5d4ec2f469de5f3fc449131d0ac2f7baa4536a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 1, 0, 1, 0, 0, false, false, false, false, false, "feb83d5ade07ab2afa6a74bc7a7839c457bb247ce60304d5bf2d7481609c558c"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 0, 0, 1, 1, 0, false, false, false, false, false, "c7258acca38063e59ba46b3126119713ba72e71cd70ead1961507fb23cf601da"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvDenseVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 0, 0, 1, 0, 0, false, false, false, false, false, "2497b700426a9e723aaf3701860ab0c03f072446639bed76aa2f093a3ba351a7"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128PersistentContext", 82256, 512, 1, 0, 2, 0, 1, 1, 0, false, false, false, false, false, "83eeb3ed81f0461c5fb2e81192e32218d27a1b5f12724b5affbae4e9aefa7e23"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PackedQkvSlidingOrChunkedCausalVarSeqQ128Kv128StaticContext", 82160, 512, 1, 0, 2, 0, 1, 0, 0, false, false, false, false, false, "c1d94d2dd9e035b3b1c6d4e270d886f0a5f8cb90b0718e40a352f340b9731b2a"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 1, 0, 1, 1, 0, false, false, false, false, false, "5053b0005130a3015e425ed3a778f978a5b9611189c6075927210b9d782fb46d"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 1, 0, 1, 0, 0, false, false, false, false, false, "375a48847ae7f23cdf36fc89e101b0b6cd44fbe7390d92e6914f0e6fb6dcca45"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 1, 0, 1, 1, 0, false, false, false, false, false, "5c57df3828e9906cda7d0cd76bc9dc18f0ec7a0e309228eef2c0b17b10df51bb"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 1, 0, 1, 0, 0, false, false, false, false, false, "ff2a6fc101decd39fcf23310f1c30720d5f9fdf550f07da87099d6b0600dc8c4"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 0, 0, 1, 1, 0, false, false, false, false, false, "f5779871fde0708e84ba5f32501f906709cb94cb4bd58d459100edcdc0984947"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 0, 0, 1, 0, 0, false, false, false, false, false, "700cf5a5c6c31fe83f3d1276f1a004339d15bb613d2bd0cb7a4ec5b62295e0a9"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 0, 0, 1, 1, 0, false, false, false, false, false, "84d5a81c6076e279c1f65a8b8f6f45cf724667205af0442b54ff57bdeb560a63"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvDenseP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 0, 0, 1, 0, 0, false, false, false, false, false, "75ad050513c7e5104ee5f636411e0181b99f2155a56ddeb4c9a356820b401304"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 32, 2, 0, 1, 1, 0, false, false, false, false, false, "2d6ed48a920145858073c102879cbee349fae250da2752b81108ee0ca7e3ebde"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP32VarSeqQ128Kv128StaticContext", 83024, 512, 2, 32, 2, 0, 1, 0, 0, false, false, false, false, false, "2fa50d8d90ed1899fd7641c82121ee9d7b730100de2f8f8ca11f6d092dd28ec1"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128PersistentContext", 83120, 512, 2, 64, 2, 0, 1, 1, 0, false, false, false, false, false, "b149aef4ff023efb25dab92b03fe388a8c6c573069af790508420ad91d4ebc10"}, +{ DATA_TYPE_FP16, DATA_TYPE_FP16, DATA_TYPE_FP16, 128, 128, 256, 128, 64, 64, 64, kSM_103, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin, FmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext_cubin_len, "fmhaSm103aKernel_QkvFp16OFp16H64HVPerCta64PagedKvSlidingOrChunkedCausalP64VarSeqQ128Kv128StaticContext", 83024, 512, 2, 64, 2, 0, 1, 0, 0, false, false, false, false, false, "3b4e0524789776bb8206e75d3329fba313da5bdddda60a9e8e34f026b7b126f5"}, #endif // EXCLUDE_SM_100 }; // clang-format on diff --git a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/kernelParams.h b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/kernelParams.h index bea484adc51..ea2027e7092 100644 --- a/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/kernelParams.h +++ b/cpp/tensorrt_llm/kernels/trtllmGenKernels/fmha/kernelParams.h @@ -685,14 +685,15 @@ struct KernelParams // The number of elements in 128B for Q. int32_t numEltsIn128BKv = (128 * 8) / get_size_in_bits(kernelMeta.mDataTypeKv); // The number of head elts (per token) in each block of shared memory (see above explanation). - int32_t numEltsInClampedHeadDimKv = std::min(numEltsIn128BKv, maxHeadDimKv); + // HeadDim will be split into multiple headDimStages (128) if maxHeadDimKv > 128. + int32_t numEltsInClampedHeadDimKv = std::min({numEltsIn128BKv, maxHeadDimKv, 128}); // Do we have to transform K/V before MMA? bool const transformsKv{kernelMeta.mDataTypeKv != kernelMeta.mDataTypeQ}; // Whether store transformed K/V in TMEM. bool const isSwapsMmaAb = isSwapsMmaAbForGenerationKernel(static_cast(kernelMeta.mKernelType)); bool const storeTransformedKvInTmem{kernelMeta.mDataTypeKv == DATA_TYPE_E2M1 - && kernelMeta.mDataTypeQ == DATA_TYPE_E4M3 && maxHeadDimKv == 128 && isSwapsMmaAb}; + && kernelMeta.mDataTypeQ == DATA_TYPE_E4M3 && maxHeadDimKv >= 128 && isSwapsMmaAb}; // Shape/stride for gmem tensor Kv. auto [shapeK, strideK] @@ -700,7 +701,7 @@ struct KernelParams auto [shapeV, strideV] = makeTmaShapeStrideKv(options, params, kernelMeta.mDataTypeKv, /*isK*/ false, storeTransformedKvInTmem); // Whether swizzle is needed for K/V. - bool const swizzleKv{storeTransformedKvInTmem ? true : !transformsKv}; + bool const swizzleKv{storeTransformedKvInTmem || !transformsKv}; // Note that for FP4 KV input, elements are stored as uint8_t, each packs 2 FP4 elements. auto const numEltsDivisor = kernelMeta.mDataTypeKv == DATA_TYPE_E2M1 && !storeTransformedKvInTmem ? 2 : 1; // The tileShapes for K/V. diff --git a/cpp/tensorrt_llm/kernels/unfusedAttentionKernels/unfusedAttentionKernels_2_template.h b/cpp/tensorrt_llm/kernels/unfusedAttentionKernels/unfusedAttentionKernels_2_template.h index abe76a59021..32facc70c50 100644 --- a/cpp/tensorrt_llm/kernels/unfusedAttentionKernels/unfusedAttentionKernels_2_template.h +++ b/cpp/tensorrt_llm/kernels/unfusedAttentionKernels/unfusedAttentionKernels_2_template.h @@ -300,21 +300,28 @@ inline __device__ void apply_rotary_embedding_gptj(VecType& q, VecType& k, float } } -template +template inline __device__ void quantizeAndWriteFP4KVCache(uint8_t* kBlockScales, uint8_t* vBlockScales, uint32_t* kDst, uint32_t* vDst, float kSecondLevelSF, float vSecondLevelSF, int inBlockIdx, PackedVec& kPacked, PackedVec& vPacked) { uint8_t* kSfOut = nullptr; uint8_t* vSfOut = nullptr; + // WARNING: 8 elements per thread is assumed. // Two threads are involved in the reduction for block scales inside // cvt_warp_fp16_to_fp4, but only one thread needs to write out the // final answer. + constexpr int NUM_SFS_PER_HEAD = VECS_PER_HEAD / 2; if (inBlockIdx % 2 == 0) { auto blockScaleIdxDst = inBlockIdx / 2; kSfOut = kBlockScales + blockScaleIdxDst; - vSfOut = vBlockScales + blockScaleIdxDst; + // A interleaved layout (num_tokens / 4, num_sfs_per_head, 4) is used for nvfp4 kv cache in order to achieve + // better performance. This is only used by trtllm-gen kernels. + auto tokenIdxV = blockScaleIdxDst / NUM_SFS_PER_HEAD; + auto headDimIdxV = blockScaleIdxDst % NUM_SFS_PER_HEAD; + auto blockScaleIdxDstV = (tokenIdxV / 4) * 4 * NUM_SFS_PER_HEAD + headDimIdxV * 4 + (tokenIdxV % 4); + vSfOut = vBlockScales + blockScaleIdxDstV; } // Despite the name of cvt_warp_fp16_to_fp4, it is used by @@ -376,6 +383,8 @@ __global__ void applyBiasRopeUpdateKVCache(QKVPreprocessingParams&>(k_to_cache); auto& vPacked = reinterpret_cast&>(v); - quantizeAndWriteFP4KVCache(kBlockScales, vBlockScales, reinterpret_cast(kDst), - reinterpret_cast(vDst), kSecondLevelSF, vSecondLevelSF, inBlockIdx, kPacked, - vPacked); + quantizeAndWriteFP4KVCache(kBlockScales, vBlockScales, + reinterpret_cast(kDst), reinterpret_cast(vDst), kSecondLevelSF, + vSecondLevelSF, inBlockIdx, kPacked, vPacked); } else { @@ -1022,9 +1031,9 @@ __global__ void applyBiasRopeUpdateKVCacheV2(QKVPreprocessingParams&>(k); auto& vPacked = reinterpret_cast&>(v); - quantizeAndWriteFP4KVCache(kBlockScales, vBlockScales, reinterpret_cast(kDst), - reinterpret_cast(vDst), kSecondLevelSF, vSecondLevelSF, inBlockIdx, kPacked, - vPacked); + quantizeAndWriteFP4KVCache(kBlockScales, vBlockScales, + reinterpret_cast(kDst), reinterpret_cast(vDst), kSecondLevelSF, + vSecondLevelSF, inBlockIdx, kPacked, vPacked); } else { @@ -1529,6 +1538,15 @@ void invokeApplyBiasRopeUpdateKVCacheDispatch(QKVPreprocessingParams) + { + TLLM_CHECK_WITH_INFO((params.size_per_head & (params.size_per_head - 1)) == 0, + "Head size needs to be power of 2 for nvfp4 kv cache."); + } +#endif + // TODO: this should be extended to support quantized FP4 outputs as well. // For now, we will assume that the attention kernel reads directly from the KV cache // and FP16 inputs. diff --git a/cpp/tensorrt_llm/kernels/xqaDispatcher.cpp b/cpp/tensorrt_llm/kernels/xqaDispatcher.cpp index 37aa1f1939e..9bc7513aea5 100644 --- a/cpp/tensorrt_llm/kernels/xqaDispatcher.cpp +++ b/cpp/tensorrt_llm/kernels/xqaDispatcher.cpp @@ -456,10 +456,11 @@ void XqaDispatcher::runImpl( tllmRunnerParams.attentionSinksPtr = params.attention_sinks; tllmRunnerParams.cumSeqLensQPtr = cu_seqlens; tllmRunnerParams.cumSeqLensKvPtr = reinterpret_cast(launchParams.cu_kv_seq_lens); + // Attention scales device pointers (only fp8 kernels need to load scales from the device memory). tllmRunnerParams.outputScalePtr = reinterpret_cast(launchParams.bmm2_scale_ptr); - // TRTLLM-GEN kernels always use the Log2 scale - tllmRunnerParams.scaleSoftmaxLog2Ptr - = reinterpret_cast(launchParams.bmm1_scale_ptr + kIdxScaleSoftmaxLog2Ptr); + tllmRunnerParams.scaleSoftmaxLog2Ptr = launchParams.bmm1_scale_ptr + ? reinterpret_cast(launchParams.bmm1_scale_ptr + kIdxScaleSoftmaxLog2Ptr) + : nullptr; tllmRunnerParams.oSfScalePtr = params.fp4_out_sf_scale; tllmRunnerParams.oPtr = params.output; diff --git a/tensorrt_llm/_torch/pyexecutor/resource_manager.py b/tensorrt_llm/_torch/pyexecutor/resource_manager.py index 6e8eee8efdd..edac17e2cc7 100644 --- a/tensorrt_llm/_torch/pyexecutor/resource_manager.py +++ b/tensorrt_llm/_torch/pyexecutor/resource_manager.py @@ -674,12 +674,6 @@ def calculate_scaling_factor_size_bytes( @staticmethod def get_cache_size_per_token(model_config: ModelConfigPython, mapping: Mapping, **kwargs): - # get kv cache dtype bytes - mem_per_token = 2 - quant_config = model_config.quant_config - if quant_config is not None and quant_config.quant_mode.has_fp8_kv_cache( - ): - mem_per_token = 1 # get num key value heads config = model_config.pretrained_config @@ -705,10 +699,24 @@ def get_cache_size_per_token(model_config: ModelConfigPython, # provide at least 1 layer to prevent division by zero cache size num_attention_layers = max( len(mapping.pp_layers(model_config.get_num_attention_layers())), 1) - mem_per_token *= num_attention_layers * head_dim - # K and V - mem_per_token *= kv_factor + mem_per_token = kv_factor * num_attention_layers * head_dim + # The data type bytes. + quant_config = model_config.quant_config + if quant_config is not None and quant_config.quant_mode.has_fp8_kv_cache( + ): + mem_per_token *= 1 + elif quant_config is not None and quant_config.quant_mode.has_fp4_kv_cache( + ): + # 1 bytes for 2 elements, and SFs (fp8) per 16 elements. + mem_per_token = math.ceil(mem_per_token / 2) + math.ceil( + mem_per_token / 16) + else: + # All other cases (fp16/bf16 kv cache), we need 2 bytes per token for K and V. + assert quant_config is None or ( + not quant_config.quant_mode.has_kv_cache_quant() + ), "Quantized kv cache is not expected" + mem_per_token *= 2 return mem_per_token def get_cache_bytes_per_token(self):