Skip to content

Commit fe569f0

Browse files
authored
[None][feat] bias for FP4 TRT-LLM Gen MoE (#9220)
Signed-off-by: Nikita Korobov <[email protected]>
1 parent 04fb481 commit fe569f0

File tree

540 files changed

+1702
-1703
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

540 files changed

+1702
-1703
lines changed

cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/trtllmGen_bmm_export/GemmOptions.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,8 +1623,8 @@ inline CUresult loadCubinData(CUmodule* module, Config const& config)
16231623
// Trtllm links the cubin into the executable while Flashinfer loads the cubin from storage.
16241624
#ifdef TLLM_GEN_EXPORT_FLASHINFER
16251625
#ifdef TLLM_GEN_GEMM_CUBIN_PATH
1626-
static const std::string tllm_gen_gemm_cubin_path = std::string(TLLM_GEN_GEMM_CUBIN_PATH);
1627-
const std::string sha256 = config.mHash ? config.mHash : "";
1626+
static std::string const tllm_gen_gemm_cubin_path = std::string(TLLM_GEN_GEMM_CUBIN_PATH);
1627+
std::string const sha256 = config.mHash ? config.mHash : "";
16281628
std::string fileName = config.mFunctionName;
16291629
if (!fileName.empty())
16301630
{

cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/trtllmGen_bmm_export/KernelMetaInfo.h

Lines changed: 711 additions & 711 deletions
Large diffs are not rendered by default.

cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/trtllmGen_bmm_export/KernelParams.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ static auto makeTmaShapeStrideAbc(GemmOptions const& options, int sizeM, int siz
224224

225225
// Create the TMA shape/stride for A/B block scaling factors.
226226
static auto makeTmaShapeStrideSfAb(int mM, int mN, int mK, MatrixType matrixType, int tileM, int tileN, int tileK,
227-
tg::SfLayout layout, int sfReshapeFactor, const int32_t numEltsPerSf)
227+
tg::SfLayout layout, int sfReshapeFactor, int32_t const numEltsPerSf)
228228
{
229229

230230
// The outer dimension.

cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/trtllmGen_bmm_export/config.json

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@
3939
"dtypeA": "e2m1",
4040
"dtypeC": "e2m1",
4141
"mmaK": 64,
42-
"tileK": 512
42+
"tileK": 512,
43+
"biasType": "m"
4344
},
4445
"BatchedGemmFp4LowLatency": {
4546
"_template": "BatchedGemmFp4Base",
@@ -53,8 +54,7 @@
5354
"mmaM": 256,
5455
"clusterDimX": 2,
5556
"sfLayoutB": "128x4",
56-
"sfLayoutC": "128x4",
57-
"biasType": "m"
57+
"sfLayoutC": "128x4"
5858
},
5959

6060
"BatchedGemmPerTensorScalingFp8Base": {
@@ -156,7 +156,6 @@
156156
"dtypeB": "e4m3",
157157
"dtypeMmaA": "e4m3",
158158
"sfBlockSizeA": 32,
159-
"biasType": "m",
160159
"act": "swiglu"
161160
}
162161
},
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:13b5175fc939f35c60bec6fff0f7cd3a8f972d250b1878f9e39b58f5b6ac7f31
3-
size 636234
2+
oid sha256:576f67d83e31ec7ffc09187a24c64f60159793502f4e14a1badbb1fa0ea03f23
3+
size 591981
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:37a2467eeb7ae0ffcb75f96d777a6ffc880a7301544f781639043ecadc2b297d
3-
size 652666
2+
oid sha256:aff49d1f7b56c004d7c6b3779cb8343f4fbbd8280a8b5ee3df62430cea1323ab
3+
size 609201

cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/trtllmGen_bmm_export/cubins/Bmm_Bfloat16_E2m1E2m1_Fp32_t128x16x256_s9_et128x16_m128x16x64_cga1x1x1_16dp256b_rM_TN_transOut_schedP2x1x2x3_bN_tmaOpt_clmp_dynBatch_sm100f_cubin.cpp

Lines changed: 0 additions & 3 deletions
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:027e329ed95766bc4df29cf2109485dc73775509c39cec5d1234a0c4f93ddd0b
3+
size 556307

cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/trtllmGen_bmm_export/cubins/Bmm_Bfloat16_E2m1E2m1_Fp32_t128x16x256_s9_et128x16_m128x16x64_cga1x1x1_16dp256b_rM_TN_transOut_schedS_bN_tmaOpt_clmp_dynBatch_sm100f_cubin.cpp

Lines changed: 0 additions & 3 deletions
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:e219cc98a5f4c19bb9dc93428e69beb7bc6b37444c9e7c8e7c2eee4411d0092f
3+
size 445835

0 commit comments

Comments
 (0)