From 1a4cd4a9deb28b865239c722c0e0a7c670c4793e Mon Sep 17 00:00:00 2001 From: Malay Nagda Date: Mon, 9 Feb 2026 16:49:28 +0530 Subject: [PATCH] dsv3_gb300_revert- BF16 & FP8-MX scale Signed-off-by: Malay Nagda --- .../deepseek_workload_base_configs.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/scripts/performance/configs/deepseek/deepseek_workload_base_configs.py b/scripts/performance/configs/deepseek/deepseek_workload_base_configs.py index fadd2f9555..6de05c2eba 100644 --- a/scripts/performance/configs/deepseek/deepseek_workload_base_configs.py +++ b/scripts/performance/configs/deepseek/deepseek_workload_base_configs.py @@ -52,7 +52,18 @@ cuda_graph_scope=[], recompute_modules=["mla_up_proj"], ) -DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_BF16_V1 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V1 +DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_BF16_V1 = replace( + DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V1, + micro_batch_size=1, + pipeline_model_parallel_size=4, + virtual_pipeline_model_parallel_size=4, + expert_model_parallel_size=64, + moe_flex_dispatcher_backend="hybridep", + moe_a2a_overlap=False, + cuda_graph_impl="transformer_engine", + cuda_graph_scope=["attn", "moe_router", "moe_preprocess"], + recompute_modules=["moe_act"], +) DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_FP8_CS_V1 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V1 DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_FP8_MX_V1 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V1 DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_NVFP4_V1 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V1 @@ -131,7 +142,10 @@ DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V1, global_batch_size=4096, ) -DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_BF16_V2 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V2 +DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_BF16_V2 = replace( + DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_BF16_V1, + global_batch_size=4096, +) DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_FP8_CS_V2 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V2 DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_FP8_MX_V2 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V2 DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_NVFP4_V2 = DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_V2 @@ -183,7 +197,7 @@ # ============================================================================= DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_FP8_MX_LARGE_SCALE = replace( - DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_FP8_MX_V1, + DEEPSEEK_V3_PRETRAIN_CONFIG_GB300_BF16_V1, global_batch_size=256, )