From 623509ca19c4ae245d915eb47db0f30959dde8de Mon Sep 17 00:00:00 2001
From: malay-nagda <malayn@nvidia.com>
Date: Fri, 13 Feb 2026 17:46:04 +0530
Subject: [PATCH] qwen gbs 2x (#2280)

Signed-off-by: Malay Nagda <malayn@nvidia.com>
Co-authored-by: Raghav Hrishikeshan Mukundan <102543536+rhmukundan@users.noreply.github.com>
Signed-off-by: NeMo Bot <nemo-bot@nvidia.com>
---
 .../performance/configs/qwen/qwen3_workload_base_configs.py   | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/performance/configs/qwen/qwen3_workload_base_configs.py b/scripts/performance/configs/qwen/qwen3_workload_base_configs.py
index 0cd9f66c8b..b18f0f8b2b 100644
--- a/scripts/performance/configs/qwen/qwen3_workload_base_configs.py
+++ b/scripts/performance/configs/qwen/qwen3_workload_base_configs.py
@@ -408,20 +408,24 @@
 QWEN3_30B_A3B_PRETRAIN_CONFIG_H100_BF16_V1 = replace(
     BASE_QWEN3_30B_A3B_CONFIG,
     num_gpus=16,
+    global_batch_size=1024,
     pipeline_model_parallel_size=2,
     virtual_pipeline_model_parallel_size=12,
     moe_a2a_overlap=True,
     cuda_graph_impl="transformer_engine",
     cuda_graph_scope=["moe_router", "moe_preprocess"],
+    moe_flex_dispatcher_backend="deepep",
 )
 
 
 QWEN3_30B_A3B_PRETRAIN_CONFIG_H100_FP8_CS_V1 = replace(
     BASE_QWEN3_30B_A3B_CONFIG,
     num_gpus=16,
+    global_batch_size=1024,
     pipeline_model_parallel_size=2,
     virtual_pipeline_model_parallel_size=12,
     moe_a2a_overlap=True,
+    moe_flex_dispatcher_backend="deepep",
 )