From 9ace743d1d910004038dcf12f2c8057a38138df6 Mon Sep 17 00:00:00 2001 From: Malay Nagda Date: Tue, 10 Feb 2026 18:33:28 +0530 Subject: [PATCH 1/6] nemotron3 nano recipes Signed-off-by: Malay Nagda --- .../performance/configs/nemotronh/__init__.py | 30 ++++ .../nemotronh/nemotron_3_nano_llm_pretrain.py | 146 ++++++++++++++++++ .../nemotron_3_nano_workload_base_configs.py | 69 +++++++++ 3 files changed, 245 insertions(+) create mode 100644 scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py create mode 100644 scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py diff --git a/scripts/performance/configs/nemotronh/__init__.py b/scripts/performance/configs/nemotronh/__init__.py index aa3f16830e..0ae89ab914 100644 --- a/scripts/performance/configs/nemotronh/__init__.py +++ b/scripts/performance/configs/nemotronh/__init__.py @@ -7,6 +7,11 @@ if HAVE_MEGATRON_BRIDGE: from .nemotronh_llm_pretrain import ( + nemotronh_3_nano_pretrain_config_b200, + nemotronh_3_nano_pretrain_config_b300, + nemotronh_3_nano_pretrain_config_gb200, + nemotronh_3_nano_pretrain_config_gb300, + nemotronh_3_nano_pretrain_config_h100, nemotronh_56b_pretrain_config_b200, nemotronh_56b_pretrain_config_b300, nemotronh_56b_pretrain_config_gb200, @@ -15,6 +20,16 @@ ) from .nemotronh_workload_base_configs import ( + NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1, NEMOTRONH_56B_PRETRAIN_CONFIG_B200_FP8_CS_V1, NEMOTRONH_56B_PRETRAIN_CONFIG_B300_FP8_CS_V1, NEMOTRONH_56B_PRETRAIN_CONFIG_GB200_FP8_CS_V1, @@ -29,6 +44,16 @@ "NEMOTRONH_56B_PRETRAIN_CONFIG_B300_FP8_CS_V1", "NEMOTRONH_56B_PRETRAIN_CONFIG_B200_FP8_CS_V1", "NEMOTRONH_56B_PRETRAIN_CONFIG_H100_FP8_CS_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1", ] if HAVE_MEGATRON_BRIDGE: @@ -39,5 +64,10 @@ "nemotronh_56b_pretrain_config_b300", "nemotronh_56b_pretrain_config_b200", "nemotronh_56b_pretrain_config_h100", + "nemotronh_3_nano_pretrain_config_gb300", + "nemotronh_3_nano_pretrain_config_gb200", + "nemotronh_3_nano_pretrain_config_b300", + "nemotronh_3_nano_pretrain_config_b200", + "nemotronh_3_nano_pretrain_config_h100", ] ) diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py new file mode 100644 index 0000000000..736487ad0b --- /dev/null +++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py @@ -0,0 +1,146 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from utils.overrides import set_workload_base_configs +from utils.precision import get_precision_config +from utils.utils import get_workload_base_config + +from megatron.bridge.recipes.nemotronh.nemotron_3_nano import nemotron_3_nano_pretrain_config as pretrain_config +from megatron.bridge.training.config import ConfigContainer + + +logger = logging.getLogger(__name__) + + +def set_nemotron_3_nano_common_configs(cfg: ConfigContainer) -> None: + """Set common performance configurations for all NemotronH configs.""" + cfg.mixed_precision.grad_reduce_in_fp32 = False + cfg.ddp.grad_reduce_in_fp32 = False + + +def nemotronh_3_nano_pretrain_config_gb300( + precision: str = "bf16", mock: bool = True, config_variant: str = "v1" +) -> ConfigContainer: + """GB300, baseline config.""" + # NemotronH currently only has FP8_CS base configs + base_cfg = get_workload_base_config( + model_family_name="nemotronh", + model_recipe_name="nemotron_3_nano", + gpu="gb300", + compute_dtype="FP8_CS", + task="pretrain", + config_variant=config_variant, + ) + precision_config = get_precision_config(precision) + + cfg = pretrain_config() + cfg.mixed_precision = precision_config + set_nemotron_3_nano_common_configs(cfg) + set_workload_base_configs(cfg, base_cfg) + + return cfg + + +def nemotron_3_nano_pretrain_config_gb200( + precision: str = "bf16", mock: bool = True, config_variant: str = "v1" +) -> ConfigContainer: + """GB200, baseline config.""" + # NemotronH currently only has FP8_CS base configs + base_cfg = get_workload_base_config( + model_family_name="nemotronh", + model_recipe_name="nemotron_3_nano", + gpu="gb200", + compute_dtype="FP8_CS", + task="pretrain", + config_variant=config_variant, + ) + precision_config = get_precision_config(precision) + + cfg = pretrain_config() + cfg.mixed_precision = precision_config + set_nemotron_3_nano_common_configs(cfg) + set_workload_base_configs(cfg, base_cfg) + + return cfg + + +def nemotron_3_nano_pretrain_config_b300( + precision: str = "bf16", mock: bool = True, config_variant: str = "v1" +) -> ConfigContainer: + """B300, baseline config.""" + # NemotronH currently only has FP8_CS base configs + base_cfg = get_workload_base_config( + model_family_name="nemotronh", + model_recipe_name="nemotron_3_nano", + gpu="b300", + compute_dtype="FP8_CS", + task="pretrain", + config_variant=config_variant, + ) + precision_config = get_precision_config(precision) + + cfg = pretrain_config() + cfg.mixed_precision = precision_config + set_nemotron_3_nano_common_configs(cfg) + set_workload_base_configs(cfg, base_cfg) + + return cfg + + +def nemotron_3_nano_pretrain_config_b200( + precision: str = "bf16", mock: bool = True, config_variant: str = "v1" +) -> ConfigContainer: + """B200, baseline config.""" + # NemotronH currently only has FP8_CS base configs + base_cfg = get_workload_base_config( + model_family_name="nemotronh", + model_recipe_name="nemotron_3_nano", + gpu="b200", + compute_dtype="FP8_CS", + task="pretrain", + config_variant=config_variant, + ) + precision_config = get_precision_config(precision) + + cfg = pretrain_config() + cfg.mixed_precision = precision_config + set_nemotron_3_nano_common_configs(cfg) + set_workload_base_configs(cfg, base_cfg) + + return cfg + + +def nemotron_3_nano_pretrain_config_h100( + precision: str = "bf16", mock: bool = True, config_variant: str = "v1" +) -> ConfigContainer: + """H100, baseline config.""" + # NemotronH currently only has FP8_CS base configs + base_cfg = get_workload_base_config( + model_family_name="nemotronh", + model_recipe_name="nemotron_3_nano", + gpu="h100", + compute_dtype="FP8_CS", + task="pretrain", + config_variant=config_variant, + ) + precision_config = get_precision_config(precision) + + cfg = pretrain_config() + cfg.mixed_precision = precision_config + set_nemotron_3_nano_common_configs(cfg) + set_workload_base_configs(cfg, base_cfg) + + return cfg diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py new file mode 100644 index 0000000000..9d0effc47b --- /dev/null +++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py @@ -0,0 +1,69 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Parallelism presets for Nemotron 3 Nano performance configs. + +Config naming convention: + {MODEL}_{SIZE}_{TASK}_CONFIG_{GPU}_{PRECISION}_{VERSION} + +V1: 30B_a3b + +Use --config_variant to select a variant. +Use --list_config_variants to see available variants interactively. +""" + +from dataclasses import replace + +from utils.utils import WorkloadBaseConfig + + +BASE_NEMOTRON_3_NANO_CONFIG = WorkloadBaseConfig( + num_gpus=8, + global_batch_size=3072, + micro_batch_size=2, + tensor_model_parallel_size=4, + expert_tensor_parallel_size=1, + expert_model_parallel_size=8, +) + +NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 = replace( + BASE_NEMOTRON_3_NANO_CONFIG, + tensor_model_parallel_size=2, +) +NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 + +NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG +NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1 = BASE_NEMOTRON_3_NANO_CONFIG + +NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG +NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1 = BASE_NEMOTRON_3_NANO_CONFIG + +NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG +NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1 = BASE_NEMOTRON_3_NANO_CONFIG + +NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG +NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1 = BASE_NEMOTRON_3_NANO_CONFIG + +__all__ = [ + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1", +] From 773f8564f1954ba8b3b914511f44fbf2fb423dca Mon Sep 17 00:00:00 2001 From: Malay Nagda Date: Tue, 17 Feb 2026 17:11:35 +0530 Subject: [PATCH 2/6] nemotronh typos corrections Signed-off-by: Malay Nagda --- .../performance/configs/nemotronh/__init__.py | 26 +++++++++++-------- .../nemotronh/nemotron_3_nano_llm_pretrain.py | 2 +- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/scripts/performance/configs/nemotronh/__init__.py b/scripts/performance/configs/nemotronh/__init__.py index 0ae89ab914..dc597c4c19 100644 --- a/scripts/performance/configs/nemotronh/__init__.py +++ b/scripts/performance/configs/nemotronh/__init__.py @@ -6,12 +6,14 @@ HAVE_MEGATRON_BRIDGE = False if HAVE_MEGATRON_BRIDGE: + from .nemotron_3_nano_llm_pretrain import ( + nemotron_3_nano_pretrain_config_b200, + nemotron_3_nano_pretrain_config_b300, + nemotron_3_nano_pretrain_config_gb200, + nemotron_3_nano_pretrain_config_gb300, + nemotron_3_nano_pretrain_config_h100, + ) from .nemotronh_llm_pretrain import ( - nemotronh_3_nano_pretrain_config_b200, - nemotronh_3_nano_pretrain_config_b300, - nemotronh_3_nano_pretrain_config_gb200, - nemotronh_3_nano_pretrain_config_gb300, - nemotronh_3_nano_pretrain_config_h100, nemotronh_56b_pretrain_config_b200, nemotronh_56b_pretrain_config_b300, nemotronh_56b_pretrain_config_gb200, @@ -19,7 +21,7 @@ nemotronh_56b_pretrain_config_h100, ) -from .nemotronh_workload_base_configs import ( +from .nemotron_3_nano_workload_base_configs import ( NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1, NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1, NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1, @@ -30,6 +32,8 @@ NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1, NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1, NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1, +) +from .nemotronh_workload_base_configs import ( NEMOTRONH_56B_PRETRAIN_CONFIG_B200_FP8_CS_V1, NEMOTRONH_56B_PRETRAIN_CONFIG_B300_FP8_CS_V1, NEMOTRONH_56B_PRETRAIN_CONFIG_GB200_FP8_CS_V1, @@ -64,10 +68,10 @@ "nemotronh_56b_pretrain_config_b300", "nemotronh_56b_pretrain_config_b200", "nemotronh_56b_pretrain_config_h100", - "nemotronh_3_nano_pretrain_config_gb300", - "nemotronh_3_nano_pretrain_config_gb200", - "nemotronh_3_nano_pretrain_config_b300", - "nemotronh_3_nano_pretrain_config_b200", - "nemotronh_3_nano_pretrain_config_h100", + "nemotron_3_nano_pretrain_config_gb300", + "nemotron_3_nano_pretrain_config_gb200", + "nemotron_3_nano_pretrain_config_b300", + "nemotron_3_nano_pretrain_config_b200", + "nemotron_3_nano_pretrain_config_h100", ] ) diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py index 736487ad0b..8dd4dc6fbf 100644 --- a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py +++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py @@ -31,7 +31,7 @@ def set_nemotron_3_nano_common_configs(cfg: ConfigContainer) -> None: cfg.ddp.grad_reduce_in_fp32 = False -def nemotronh_3_nano_pretrain_config_gb300( +def nemotron_3_nano_pretrain_config_gb300( precision: str = "bf16", mock: bool = True, config_variant: str = "v1" ) -> ConfigContainer: """GB300, baseline config.""" From 4f384bca0dbd8996f3de7d487a2a2f43ba8bb54a Mon Sep 17 00:00:00 2001 From: Malay Nagda Date: Tue, 17 Feb 2026 18:40:40 +0530 Subject: [PATCH 3/6] nemotronh no CS precision Signed-off-by: Malay Nagda --- .../nemotronh/nemotron_3_nano_llm_pretrain.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py index 8dd4dc6fbf..0e3d07e0c6 100644 --- a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py +++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py @@ -35,12 +35,11 @@ def nemotron_3_nano_pretrain_config_gb300( precision: str = "bf16", mock: bool = True, config_variant: str = "v1" ) -> ConfigContainer: """GB300, baseline config.""" - # NemotronH currently only has FP8_CS base configs base_cfg = get_workload_base_config( model_family_name="nemotronh", model_recipe_name="nemotron_3_nano", gpu="gb300", - compute_dtype="FP8_CS", + compute_dtype=precision.upper(), task="pretrain", config_variant=config_variant, ) @@ -58,12 +57,11 @@ def nemotron_3_nano_pretrain_config_gb200( precision: str = "bf16", mock: bool = True, config_variant: str = "v1" ) -> ConfigContainer: """GB200, baseline config.""" - # NemotronH currently only has FP8_CS base configs base_cfg = get_workload_base_config( model_family_name="nemotronh", model_recipe_name="nemotron_3_nano", gpu="gb200", - compute_dtype="FP8_CS", + compute_dtype=precision.upper(), task="pretrain", config_variant=config_variant, ) @@ -81,12 +79,11 @@ def nemotron_3_nano_pretrain_config_b300( precision: str = "bf16", mock: bool = True, config_variant: str = "v1" ) -> ConfigContainer: """B300, baseline config.""" - # NemotronH currently only has FP8_CS base configs base_cfg = get_workload_base_config( model_family_name="nemotronh", model_recipe_name="nemotron_3_nano", gpu="b300", - compute_dtype="FP8_CS", + compute_dtype=precision.upper(), task="pretrain", config_variant=config_variant, ) @@ -104,12 +101,11 @@ def nemotron_3_nano_pretrain_config_b200( precision: str = "bf16", mock: bool = True, config_variant: str = "v1" ) -> ConfigContainer: """B200, baseline config.""" - # NemotronH currently only has FP8_CS base configs base_cfg = get_workload_base_config( model_family_name="nemotronh", model_recipe_name="nemotron_3_nano", gpu="b200", - compute_dtype="FP8_CS", + compute_dtype=precision.upper(), task="pretrain", config_variant=config_variant, ) @@ -127,12 +123,11 @@ def nemotron_3_nano_pretrain_config_h100( precision: str = "bf16", mock: bool = True, config_variant: str = "v1" ) -> ConfigContainer: """H100, baseline config.""" - # NemotronH currently only has FP8_CS base configs base_cfg = get_workload_base_config( model_family_name="nemotronh", model_recipe_name="nemotron_3_nano", gpu="h100", - compute_dtype="FP8_CS", + compute_dtype=precision.upper(), task="pretrain", config_variant=config_variant, ) From a6128a7ad43dff63fa396dc50ca5cf6f3676d173 Mon Sep 17 00:00:00 2001 From: Malay Nagda Date: Tue, 17 Feb 2026 19:42:26 +0530 Subject: [PATCH 4/6] tp=2 for blackwell Signed-off-by: Malay Nagda --- .../nemotron_3_nano_workload_base_configs.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py index 9d0effc47b..0ed523f233 100644 --- a/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py +++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py @@ -43,14 +43,23 @@ ) NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 -NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG -NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1 = BASE_NEMOTRON_3_NANO_CONFIG +NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1 = replace( + BASE_NEMOTRON_3_NANO_CONFIG, + tensor_model_parallel_size=2, +) +NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1 -NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG -NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1 = BASE_NEMOTRON_3_NANO_CONFIG +NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 = replace( + BASE_NEMOTRON_3_NANO_CONFIG, + tensor_model_parallel_size=2, +) +NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 -NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG -NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1 = BASE_NEMOTRON_3_NANO_CONFIG +NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1 = replace( + BASE_NEMOTRON_3_NANO_CONFIG, + tensor_model_parallel_size=2, +) +NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1 NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1 = BASE_NEMOTRON_3_NANO_CONFIG From 33b26e1840ca3cc3bccb3f96613b380d58996e29 Mon Sep 17 00:00:00 2001 From: Malay Nagda Date: Thu, 19 Feb 2026 14:16:47 +0530 Subject: [PATCH 5/6] TP=1 for Blackwell Signed-off-by: Malay Nagda --- .../nemotronh/nemotron_3_nano_workload_base_configs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py index 0ed523f233..a042d3bb57 100644 --- a/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py +++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py @@ -39,25 +39,25 @@ NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 = replace( BASE_NEMOTRON_3_NANO_CONFIG, - tensor_model_parallel_size=2, + tensor_model_parallel_size=1, ) NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1 = replace( BASE_NEMOTRON_3_NANO_CONFIG, - tensor_model_parallel_size=2, + tensor_model_parallel_size=1, ) NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1 NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 = replace( BASE_NEMOTRON_3_NANO_CONFIG, - tensor_model_parallel_size=2, + tensor_model_parallel_size=1, ) NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1 = replace( BASE_NEMOTRON_3_NANO_CONFIG, - tensor_model_parallel_size=2, + tensor_model_parallel_size=1, ) NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1 From 957723c7fdbcbe7bfc30a16f4c2fb8e9ef96c659 Mon Sep 17 00:00:00 2001 From: Malay Nagda Date: Thu, 19 Feb 2026 14:28:06 +0530 Subject: [PATCH 6/6] cleanup Signed-off-by: Malay Nagda --- .../nemotronh/nemotron_3_nano_llm_pretrain.py | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py index 0e3d07e0c6..f71d8e34b9 100644 --- a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py +++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py @@ -26,14 +26,12 @@ def set_nemotron_3_nano_common_configs(cfg: ConfigContainer) -> None: - """Set common performance configurations for all NemotronH configs.""" + """Set common performance configurations for all Nemotron 3 Nano configs.""" cfg.mixed_precision.grad_reduce_in_fp32 = False cfg.ddp.grad_reduce_in_fp32 = False -def nemotron_3_nano_pretrain_config_gb300( - precision: str = "bf16", mock: bool = True, config_variant: str = "v1" -) -> ConfigContainer: +def nemotron_3_nano_pretrain_config_gb300(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer: """GB300, baseline config.""" base_cfg = get_workload_base_config( model_family_name="nemotronh", @@ -53,9 +51,7 @@ def nemotron_3_nano_pretrain_config_gb300( return cfg -def nemotron_3_nano_pretrain_config_gb200( - precision: str = "bf16", mock: bool = True, config_variant: str = "v1" -) -> ConfigContainer: +def nemotron_3_nano_pretrain_config_gb200(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer: """GB200, baseline config.""" base_cfg = get_workload_base_config( model_family_name="nemotronh", @@ -75,9 +71,7 @@ def nemotron_3_nano_pretrain_config_gb200( return cfg -def nemotron_3_nano_pretrain_config_b300( - precision: str = "bf16", mock: bool = True, config_variant: str = "v1" -) -> ConfigContainer: +def nemotron_3_nano_pretrain_config_b300(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer: """B300, baseline config.""" base_cfg = get_workload_base_config( model_family_name="nemotronh", @@ -97,9 +91,7 @@ def nemotron_3_nano_pretrain_config_b300( return cfg -def nemotron_3_nano_pretrain_config_b200( - precision: str = "bf16", mock: bool = True, config_variant: str = "v1" -) -> ConfigContainer: +def nemotron_3_nano_pretrain_config_b200(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer: """B200, baseline config.""" base_cfg = get_workload_base_config( model_family_name="nemotronh", @@ -119,9 +111,7 @@ def nemotron_3_nano_pretrain_config_b200( return cfg -def nemotron_3_nano_pretrain_config_h100( - precision: str = "bf16", mock: bool = True, config_variant: str = "v1" -) -> ConfigContainer: +def nemotron_3_nano_pretrain_config_h100(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer: """H100, baseline config.""" base_cfg = get_workload_base_config( model_family_name="nemotronh",