diff --git a/scripts/performance/configs/qwen/qwen3_workload_base_configs.py b/scripts/performance/configs/qwen/qwen3_workload_base_configs.py index b669c33651..0cd9f66c8b 100644 --- a/scripts/performance/configs/qwen/qwen3_workload_base_configs.py +++ b/scripts/performance/configs/qwen/qwen3_workload_base_configs.py @@ -198,8 +198,8 @@ QWEN3_235B_A22B_PRETRAIN_CONFIG_GB300_FP8_CS_V1, num_gpus=256, pipeline_model_parallel_size=4, - virtual_pipeline_model_parallel_size=12, - expert_model_parallel_size=16, + expert_model_parallel_size=32, + cuda_graph_scope=["attn", "moe_router", "moe_preprocess"], global_batch_size=8192, ) @@ -217,6 +217,7 @@ QWEN3_235B_A22B_PRETRAIN_CONFIG_GB200_FP8_CS_V2 = replace( QWEN3_235B_A22B_PRETRAIN_CONFIG_GB200_FP8_CS_V1, num_gpus=256, + expert_model_parallel_size=32, global_batch_size=8192, )