diff --git a/scripts/performance/configs/deepseek/deepseek_workload_base_configs.py b/scripts/performance/configs/deepseek/deepseek_workload_base_configs.py index ec38533a6d..4560430a35 100644 --- a/scripts/performance/configs/deepseek/deepseek_workload_base_configs.py +++ b/scripts/performance/configs/deepseek/deepseek_workload_base_configs.py @@ -103,7 +103,7 @@ BASE_DEEPSEEK_V3_CONFIG, num_gpus=256, pipeline_model_parallel_size=16, - expert_model_parallel_size=16, + expert_model_parallel_size=8, global_batch_size=2048, recompute_modules=["mla_up_proj"], moe_flex_dispatcher_backend="hybridep", @@ -161,7 +161,11 @@ DEEPSEEK_V3_PRETRAIN_CONFIG_B300_V1, global_batch_size=4096, ) -DEEPSEEK_V3_PRETRAIN_CONFIG_B300_BF16_V2 = DEEPSEEK_V3_PRETRAIN_CONFIG_B300_V2 +DEEPSEEK_V3_PRETRAIN_CONFIG_B300_BF16_V2 = replace( + DEEPSEEK_V3_PRETRAIN_CONFIG_B300_V2, + pipeline_model_parallel_size=8, + virtual_pipeline_model_parallel_size=2, +) DEEPSEEK_V3_PRETRAIN_CONFIG_B300_FP8_CS_V2 = DEEPSEEK_V3_PRETRAIN_CONFIG_B300_V2 DEEPSEEK_V3_PRETRAIN_CONFIG_B300_FP8_MX_V2 = DEEPSEEK_V3_PRETRAIN_CONFIG_B300_FP8_CS_V2 diff --git a/scripts/performance/configs/llama/llama31_workload_base_configs.py b/scripts/performance/configs/llama/llama31_workload_base_configs.py index b125e3b5ff..7e779d3c44 100644 --- a/scripts/performance/configs/llama/llama31_workload_base_configs.py +++ b/scripts/performance/configs/llama/llama31_workload_base_configs.py @@ -224,9 +224,9 @@ LLAMA31_405B_PRETRAIN_CONFIG_GB300_FP8_CS_V2 = replace( LLAMA31_405B_PRETRAIN_CONFIG_GB300_FP8_CS_V1, - tensor_model_parallel_size=2, + tensor_model_parallel_size=4, pipeline_model_parallel_size=8, - context_parallel_size=2, + context_parallel_size=1, virtual_pipeline_model_parallel_size=4, num_gpus=256, global_batch_size=1536,