From 623509ca19c4ae245d915eb47db0f30959dde8de Mon Sep 17 00:00:00 2001 From: malay-nagda Date: Fri, 13 Feb 2026 17:46:04 +0530 Subject: [PATCH] qwen gbs 2x (#2280) Signed-off-by: Malay Nagda Co-authored-by: Raghav Hrishikeshan Mukundan <102543536+rhmukundan@users.noreply.github.com> Signed-off-by: NeMo Bot --- .../performance/configs/qwen/qwen3_workload_base_configs.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/performance/configs/qwen/qwen3_workload_base_configs.py b/scripts/performance/configs/qwen/qwen3_workload_base_configs.py index 0cd9f66c8b..b18f0f8b2b 100644 --- a/scripts/performance/configs/qwen/qwen3_workload_base_configs.py +++ b/scripts/performance/configs/qwen/qwen3_workload_base_configs.py @@ -408,20 +408,24 @@ QWEN3_30B_A3B_PRETRAIN_CONFIG_H100_BF16_V1 = replace( BASE_QWEN3_30B_A3B_CONFIG, num_gpus=16, + global_batch_size=1024, pipeline_model_parallel_size=2, virtual_pipeline_model_parallel_size=12, moe_a2a_overlap=True, cuda_graph_impl="transformer_engine", cuda_graph_scope=["moe_router", "moe_preprocess"], + moe_flex_dispatcher_backend="deepep", ) QWEN3_30B_A3B_PRETRAIN_CONFIG_H100_FP8_CS_V1 = replace( BASE_QWEN3_30B_A3B_CONFIG, num_gpus=16, + global_batch_size=1024, pipeline_model_parallel_size=2, virtual_pipeline_model_parallel_size=12, moe_a2a_overlap=True, + moe_flex_dispatcher_backend="deepep", )