From fe6f4075e498f225bc0595117636989675e9263d Mon Sep 17 00:00:00 2001 From: Malay Nagda Date: Tue, 24 Feb 2026 15:19:07 +0530 Subject: [PATCH 1/2] disable CG for 8B SFT Signed-off-by: Malay Nagda --- .../performance/configs/llama/llama3_workload_base_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/performance/configs/llama/llama3_workload_base_configs.py b/scripts/performance/configs/llama/llama3_workload_base_configs.py index bea561672b..1539729185 100644 --- a/scripts/performance/configs/llama/llama3_workload_base_configs.py +++ b/scripts/performance/configs/llama/llama3_workload_base_configs.py @@ -467,7 +467,7 @@ peft="none", micro_batch_size=1, global_batch_size=8, - cuda_graph_impl="transformer_engine", + cuda_graph_impl="none", # NOTE: CUDA Graphs reduces performance here cuda_graph_scope="mlp", ) From d25de21165e127422924abc1eda97b186b3dae2b Mon Sep 17 00:00:00 2001 From: Malay Nagda Date: Tue, 24 Feb 2026 16:16:28 +0530 Subject: [PATCH 2/2] disable CG for 8B SFT H100 FP8-CS Signed-off-by: Malay Nagda --- .../performance/configs/llama/llama3_workload_base_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/performance/configs/llama/llama3_workload_base_configs.py b/scripts/performance/configs/llama/llama3_workload_base_configs.py index 1539729185..0f60d5d057 100644 --- a/scripts/performance/configs/llama/llama3_workload_base_configs.py +++ b/scripts/performance/configs/llama/llama3_workload_base_configs.py @@ -486,7 +486,7 @@ LLAMA3_8B_SFT_CONFIG_H100_BF16_V1 = _LLAMA3_8B_SFT_CONFIG_H100 LLAMA3_8B_SFT_CONFIG_H100_FP8_CS_V1 = replace( _LLAMA3_8B_SFT_CONFIG_H100, - cuda_graph_impl="transformer_engine", + cuda_graph_impl="none", cuda_graph_scope="mlp", )