From 1a4cd4a9deb28b865239c722c0e0a7c670c4793e Mon Sep 17 00:00:00 2001
From: Malay Nagda <malayn@nvidia.com>
Date: Mon, 9 Feb 2026 16:49:28 +0530
Subject: [PATCH] dsv3_gb300_revert- BF16 & FP8-MX scale

Signed-off-by: Malay Nagda <malayn@nvidia.com>
---
 .../deepseek_workload_base_configs.py         | 20 ++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/scripts/performance/configs/deepseek/deepseek_workload_base_configs.py b/scripts/performance/configs/deepseek/deepseek_workload_base_configs.py
index fadd2f9555..6de05c2eba 100644
--- a/scripts/performance/configs/deepseek/deepseek_workload_base_configs.py
+++ b/scripts/performance/configs/deepseek/deepseek_workload_base_configs.py
@@ -52,7 +52,18 @@
     cuda_graph_scope=[],
     recompute_modules=["mla_up_proj"],
 )
-DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_BF16_V1 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V1
+DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_BF16_V1 = replace(
+    DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V1,
+    micro_batch_size=1,
+    pipeline_model_parallel_size=4,
+    virtual_pipeline_model_parallel_size=4,
+    expert_model_parallel_size=64,
+    moe_flex_dispatcher_backend="hybridep",
+    moe_a2a_overlap=False,
+    cuda_graph_impl="transformer_engine",
+    cuda_graph_scope=["attn", "moe_router", "moe_preprocess"],
+    recompute_modules=["moe_act"],
+)
 DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_FP8_CS_V1 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V1
 DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_FP8_MX_V1 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V1
 DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_NVFP4_V1 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V1
@@ -131,7 +142,10 @@
     DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V1,
     global_batch_size=4096,
 )
-DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_BF16_V2 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V2
+DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_BF16_V2 = replace(
+    DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_BF16_V1,
+    global_batch_size=4096,
+)
 DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_FP8_CS_V2 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V2
 DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_FP8_MX_V2 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V2
 DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_NVFP4_V2 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V2
@@ -183,7 +197,7 @@
 # =============================================================================
 
 DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_FP8_MX_LARGE_SCALE = replace(
-    DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_FP8_MX_V1,
+    DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_BF16_V1,
     global_batch_size=256,
 )