From 0a8447aa657932ff8ee181f68488f6a27143fb65 Mon Sep 17 00:00:00 2001
From: Dingqing Yang <dingqingy@nvidia.com>
Date: Tue, 3 Feb 2026 21:21:35 -0800
Subject: [PATCH] update qwen3 235b mxfp8 gb recipe andresolves nan grad norm

Signed-off-by: Dingqing Yang <dingqingy@nvidia.com>
---
 .../performance/configs/qwen/qwen3_workload_base_configs.py  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/performance/configs/qwen/qwen3_workload_base_configs.py b/scripts/performance/configs/qwen/qwen3_workload_base_configs.py
index b669c33651..0cd9f66c8b 100644
--- a/scripts/performance/configs/qwen/qwen3_workload_base_configs.py
+++ b/scripts/performance/configs/qwen/qwen3_workload_base_configs.py
@@ -198,8 +198,8 @@
     QWEN3_235B_A22B_PRETRAIN_CONFIG_GB300_FP8_CS_V1,
     num_gpus=256,
     pipeline_model_parallel_size=4,
-    virtual_pipeline_model_parallel_size=12,
-    expert_model_parallel_size=16,
+    expert_model_parallel_size=32,
+    cuda_graph_scope=["attn", "moe_router", "moe_preprocess"],
     global_batch_size=8192,
 )
 
@@ -217,6 +217,7 @@
 QWEN3_235B_A22B_PRETRAIN_CONFIG_GB200_FP8_CS_V2 = replace(
     QWEN3_235B_A22B_PRETRAIN_CONFIG_GB200_FP8_CS_V1,
     num_gpus=256,
+    expert_model_parallel_size=32,
     global_batch_size=8192,
 )