Skip to content

Commit 99eb4ec

Browse files
committed
add fp8 per tensor bench
Signed-off-by: Siyuan Fu <[email protected]>
1 parent 9c2ec07 commit 99eb4ec

File tree

1 file changed

+24
-11
lines changed

1 file changed

+24
-11
lines changed

benchmarks/bench_trtllm_gen_fused_moe_autotuner.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -284,14 +284,27 @@ def bench(do_autotune):
284284
"--iterations", type=int, default=100, help="Number of benchmark iterations"
285285
)
286286
args = parser.parse_args()
287-
bench_trtllm_gen_fused_moe_autotuner(
288-
args.tune_max_num_tokens,
289-
args.quant_mode,
290-
args.num_tokens,
291-
args.num_experts,
292-
args.hidden_size,
293-
args.intermediate_size,
294-
args.top_k,
295-
args.warmups,
296-
args.iterations,
297-
)
287+
if args.quant_mode == "Fp8-Per-Tensor":
288+
bench_trtllm_gen_fused_moe_autotuner_fp8(
289+
args.tune_max_num_tokens,
290+
args.quant_mode,
291+
args.num_tokens,
292+
args.num_experts,
293+
args.hidden_size,
294+
args.intermediate_size,
295+
args.top_k,
296+
args.warmups,
297+
args.iterations,
298+
)
299+
else:
300+
bench_trtllm_gen_fused_moe_autotuner_fp4(
301+
args.tune_max_num_tokens,
302+
args.quant_mode,
303+
args.num_tokens,
304+
args.num_experts,
305+
args.hidden_size,
306+
args.intermediate_size,
307+
args.top_k,
308+
args.warmups,
309+
args.iterations,
310+
)

0 commit comments

Comments
 (0)