From 9ace743d1d910004038dcf12f2c8057a38138df6 Mon Sep 17 00:00:00 2001
From: Malay Nagda <malayn@nvidia.com>
Date: Tue, 10 Feb 2026 18:33:28 +0530
Subject: [PATCH 1/6] nemotron3 nano recipes

Signed-off-by: Malay Nagda <malayn@nvidia.com>
---
 .../performance/configs/nemotronh/__init__.py |  30 ++++
 .../nemotronh/nemotron_3_nano_llm_pretrain.py | 146 ++++++++++++++++++
 .../nemotron_3_nano_workload_base_configs.py  |  69 +++++++++
 3 files changed, 245 insertions(+)
 create mode 100644 scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py
 create mode 100644 scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py

diff --git a/scripts/performance/configs/nemotronh/__init__.py b/scripts/performance/configs/nemotronh/__init__.py
index aa3f16830e..0ae89ab914 100644
--- a/scripts/performance/configs/nemotronh/__init__.py
+++ b/scripts/performance/configs/nemotronh/__init__.py
@@ -7,6 +7,11 @@
 
 if HAVE_MEGATRON_BRIDGE:
     from .nemotronh_llm_pretrain import (
+        nemotronh_3_nano_pretrain_config_b200,
+        nemotronh_3_nano_pretrain_config_b300,
+        nemotronh_3_nano_pretrain_config_gb200,
+        nemotronh_3_nano_pretrain_config_gb300,
+        nemotronh_3_nano_pretrain_config_h100,
         nemotronh_56b_pretrain_config_b200,
         nemotronh_56b_pretrain_config_b300,
         nemotronh_56b_pretrain_config_gb200,
@@ -15,6 +20,16 @@
     )
 
 from .nemotronh_workload_base_configs import (
+    NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1,
+    NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1,
+    NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1,
+    NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1,
+    NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1,
+    NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1,
+    NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1,
+    NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1,
+    NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1,
+    NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1,
     NEMOTRONH_56B_PRETRAIN_CONFIG_B200_FP8_CS_V1,
     NEMOTRONH_56B_PRETRAIN_CONFIG_B300_FP8_CS_V1,
     NEMOTRONH_56B_PRETRAIN_CONFIG_GB200_FP8_CS_V1,
@@ -29,6 +44,16 @@
     "NEMOTRONH_56B_PRETRAIN_CONFIG_B300_FP8_CS_V1",
     "NEMOTRONH_56B_PRETRAIN_CONFIG_B200_FP8_CS_V1",
     "NEMOTRONH_56B_PRETRAIN_CONFIG_H100_FP8_CS_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1",
 ]
 
 if HAVE_MEGATRON_BRIDGE:
@@ -39,5 +64,10 @@
             "nemotronh_56b_pretrain_config_b300",
             "nemotronh_56b_pretrain_config_b200",
             "nemotronh_56b_pretrain_config_h100",
+            "nemotronh_3_nano_pretrain_config_gb300",
+            "nemotronh_3_nano_pretrain_config_gb200",
+            "nemotronh_3_nano_pretrain_config_b300",
+            "nemotronh_3_nano_pretrain_config_b200",
+            "nemotronh_3_nano_pretrain_config_h100",
         ]
     )
diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py
new file mode 100644
index 0000000000..736487ad0b
--- /dev/null
+++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py
@@ -0,0 +1,146 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from utils.overrides import set_workload_base_configs
+from utils.precision import get_precision_config
+from utils.utils import get_workload_base_config
+
+from megatron.bridge.recipes.nemotronh.nemotron_3_nano import nemotron_3_nano_pretrain_config as pretrain_config
+from megatron.bridge.training.config import ConfigContainer
+
+
+logger = logging.getLogger(__name__)
+
+
+def set_nemotron_3_nano_common_configs(cfg: ConfigContainer) -> None:
+    """Set common performance configurations for all NemotronH configs."""
+    cfg.mixed_precision.grad_reduce_in_fp32 = False
+    cfg.ddp.grad_reduce_in_fp32 = False
+
+
+def nemotronh_3_nano_pretrain_config_gb300(
+    precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
+) -> ConfigContainer:
+    """GB300, baseline config."""
+    # NemotronH currently only has FP8_CS base configs
+    base_cfg = get_workload_base_config(
+        model_family_name="nemotronh",
+        model_recipe_name="nemotron_3_nano",
+        gpu="gb300",
+        compute_dtype="FP8_CS",
+        task="pretrain",
+        config_variant=config_variant,
+    )
+    precision_config = get_precision_config(precision)
+
+    cfg = pretrain_config()
+    cfg.mixed_precision = precision_config
+    set_nemotron_3_nano_common_configs(cfg)
+    set_workload_base_configs(cfg, base_cfg)
+
+    return cfg
+
+
+def nemotron_3_nano_pretrain_config_gb200(
+    precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
+) -> ConfigContainer:
+    """GB200, baseline config."""
+    # NemotronH currently only has FP8_CS base configs
+    base_cfg = get_workload_base_config(
+        model_family_name="nemotronh",
+        model_recipe_name="nemotron_3_nano",
+        gpu="gb200",
+        compute_dtype="FP8_CS",
+        task="pretrain",
+        config_variant=config_variant,
+    )
+    precision_config = get_precision_config(precision)
+
+    cfg = pretrain_config()
+    cfg.mixed_precision = precision_config
+    set_nemotron_3_nano_common_configs(cfg)
+    set_workload_base_configs(cfg, base_cfg)
+
+    return cfg
+
+
+def nemotron_3_nano_pretrain_config_b300(
+    precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
+) -> ConfigContainer:
+    """B300, baseline config."""
+    # NemotronH currently only has FP8_CS base configs
+    base_cfg = get_workload_base_config(
+        model_family_name="nemotronh",
+        model_recipe_name="nemotron_3_nano",
+        gpu="b300",
+        compute_dtype="FP8_CS",
+        task="pretrain",
+        config_variant=config_variant,
+    )
+    precision_config = get_precision_config(precision)
+
+    cfg = pretrain_config()
+    cfg.mixed_precision = precision_config
+    set_nemotron_3_nano_common_configs(cfg)
+    set_workload_base_configs(cfg, base_cfg)
+
+    return cfg
+
+
+def nemotron_3_nano_pretrain_config_b200(
+    precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
+) -> ConfigContainer:
+    """B200, baseline config."""
+    # NemotronH currently only has FP8_CS base configs
+    base_cfg = get_workload_base_config(
+        model_family_name="nemotronh",
+        model_recipe_name="nemotron_3_nano",
+        gpu="b200",
+        compute_dtype="FP8_CS",
+        task="pretrain",
+        config_variant=config_variant,
+    )
+    precision_config = get_precision_config(precision)
+
+    cfg = pretrain_config()
+    cfg.mixed_precision = precision_config
+    set_nemotron_3_nano_common_configs(cfg)
+    set_workload_base_configs(cfg, base_cfg)
+
+    return cfg
+
+
+def nemotron_3_nano_pretrain_config_h100(
+    precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
+) -> ConfigContainer:
+    """H100, baseline config."""
+    # NemotronH currently only has FP8_CS base configs
+    base_cfg = get_workload_base_config(
+        model_family_name="nemotronh",
+        model_recipe_name="nemotron_3_nano",
+        gpu="h100",
+        compute_dtype="FP8_CS",
+        task="pretrain",
+        config_variant=config_variant,
+    )
+    precision_config = get_precision_config(precision)
+
+    cfg = pretrain_config()
+    cfg.mixed_precision = precision_config
+    set_nemotron_3_nano_common_configs(cfg)
+    set_workload_base_configs(cfg, base_cfg)
+
+    return cfg
diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py
new file mode 100644
index 0000000000..9d0effc47b
--- /dev/null
+++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Parallelism presets for Nemotron 3 Nano performance configs.
+
+Config naming convention:
+    {MODEL}_{SIZE}_{TASK}_CONFIG_{GPU}_{PRECISION}_{VERSION}
+
+V1: 30B_a3b
+
+Use --config_variant to select a variant.
+Use --list_config_variants to see available variants interactively.
+"""
+
+from dataclasses import replace
+
+from utils.utils import WorkloadBaseConfig
+
+
+BASE_NEMOTRON_3_NANO_CONFIG = WorkloadBaseConfig(
+    num_gpus=8,
+    global_batch_size=3072,
+    micro_batch_size=2,
+    tensor_model_parallel_size=4,
+    expert_tensor_parallel_size=1,
+    expert_model_parallel_size=8,
+)
+
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 = replace(
+    BASE_NEMOTRON_3_NANO_CONFIG,
+    tensor_model_parallel_size=2,
+)
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1
+
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1 = BASE_NEMOTRON_3_NANO_CONFIG
+
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1 = BASE_NEMOTRON_3_NANO_CONFIG
+
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1 = BASE_NEMOTRON_3_NANO_CONFIG
+
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1 = BASE_NEMOTRON_3_NANO_CONFIG
+
+__all__ = [
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1",
+    "NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1",
+]

From 773f8564f1954ba8b3b914511f44fbf2fb423dca Mon Sep 17 00:00:00 2001
From: Malay Nagda <malayn@nvidia.com>
Date: Tue, 17 Feb 2026 17:11:35 +0530
Subject: [PATCH 2/6] nemotronh typos corrections

Signed-off-by: Malay Nagda <malayn@nvidia.com>
---
 .../performance/configs/nemotronh/__init__.py | 26 +++++++++++--------
 .../nemotronh/nemotron_3_nano_llm_pretrain.py |  2 +-
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/scripts/performance/configs/nemotronh/__init__.py b/scripts/performance/configs/nemotronh/__init__.py
index 0ae89ab914..dc597c4c19 100644
--- a/scripts/performance/configs/nemotronh/__init__.py
+++ b/scripts/performance/configs/nemotronh/__init__.py
@@ -6,12 +6,14 @@
     HAVE_MEGATRON_BRIDGE = False
 
 if HAVE_MEGATRON_BRIDGE:
+    from .nemotron_3_nano_llm_pretrain import (
+        nemotron_3_nano_pretrain_config_b200,
+        nemotron_3_nano_pretrain_config_b300,
+        nemotron_3_nano_pretrain_config_gb200,
+        nemotron_3_nano_pretrain_config_gb300,
+        nemotron_3_nano_pretrain_config_h100,
+    )
     from .nemotronh_llm_pretrain import (
-        nemotronh_3_nano_pretrain_config_b200,
-        nemotronh_3_nano_pretrain_config_b300,
-        nemotronh_3_nano_pretrain_config_gb200,
-        nemotronh_3_nano_pretrain_config_gb300,
-        nemotronh_3_nano_pretrain_config_h100,
         nemotronh_56b_pretrain_config_b200,
         nemotronh_56b_pretrain_config_b300,
         nemotronh_56b_pretrain_config_gb200,
@@ -19,7 +21,7 @@
         nemotronh_56b_pretrain_config_h100,
     )
 
-from .nemotronh_workload_base_configs import (
+from .nemotron_3_nano_workload_base_configs import (
     NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1,
     NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1,
     NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1,
@@ -30,6 +32,8 @@
     NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1,
     NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1,
     NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1,
+)
+from .nemotronh_workload_base_configs import (
     NEMOTRONH_56B_PRETRAIN_CONFIG_B200_FP8_CS_V1,
     NEMOTRONH_56B_PRETRAIN_CONFIG_B300_FP8_CS_V1,
     NEMOTRONH_56B_PRETRAIN_CONFIG_GB200_FP8_CS_V1,
@@ -64,10 +68,10 @@
             "nemotronh_56b_pretrain_config_b300",
             "nemotronh_56b_pretrain_config_b200",
             "nemotronh_56b_pretrain_config_h100",
-            "nemotronh_3_nano_pretrain_config_gb300",
-            "nemotronh_3_nano_pretrain_config_gb200",
-            "nemotronh_3_nano_pretrain_config_b300",
-            "nemotronh_3_nano_pretrain_config_b200",
-            "nemotronh_3_nano_pretrain_config_h100",
+            "nemotron_3_nano_pretrain_config_gb300",
+            "nemotron_3_nano_pretrain_config_gb200",
+            "nemotron_3_nano_pretrain_config_b300",
+            "nemotron_3_nano_pretrain_config_b200",
+            "nemotron_3_nano_pretrain_config_h100",
         ]
     )
diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py
index 736487ad0b..8dd4dc6fbf 100644
--- a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py
+++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py
@@ -31,7 +31,7 @@ def set_nemotron_3_nano_common_configs(cfg: ConfigContainer) -> None:
     cfg.ddp.grad_reduce_in_fp32 = False
 
 
-def nemotronh_3_nano_pretrain_config_gb300(
+def nemotron_3_nano_pretrain_config_gb300(
     precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
 ) -> ConfigContainer:
     """GB300, baseline config."""

From 4f384bca0dbd8996f3de7d487a2a2f43ba8bb54a Mon Sep 17 00:00:00 2001
From: Malay Nagda <malayn@nvidia.com>
Date: Tue, 17 Feb 2026 18:40:40 +0530
Subject: [PATCH 3/6] nemotronh no CS precision

Signed-off-by: Malay Nagda <malayn@nvidia.com>
---
 .../nemotronh/nemotron_3_nano_llm_pretrain.py     | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py
index 8dd4dc6fbf..0e3d07e0c6 100644
--- a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py
+++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py
@@ -35,12 +35,11 @@ def nemotron_3_nano_pretrain_config_gb300(
     precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
 ) -> ConfigContainer:
     """GB300, baseline config."""
-    # NemotronH currently only has FP8_CS base configs
     base_cfg = get_workload_base_config(
         model_family_name="nemotronh",
         model_recipe_name="nemotron_3_nano",
         gpu="gb300",
-        compute_dtype="FP8_CS",
+        compute_dtype=precision.upper(),
         task="pretrain",
         config_variant=config_variant,
     )
@@ -58,12 +57,11 @@ def nemotron_3_nano_pretrain_config_gb200(
     precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
 ) -> ConfigContainer:
     """GB200, baseline config."""
-    # NemotronH currently only has FP8_CS base configs
     base_cfg = get_workload_base_config(
         model_family_name="nemotronh",
         model_recipe_name="nemotron_3_nano",
         gpu="gb200",
-        compute_dtype="FP8_CS",
+        compute_dtype=precision.upper(),
         task="pretrain",
         config_variant=config_variant,
     )
@@ -81,12 +79,11 @@ def nemotron_3_nano_pretrain_config_b300(
     precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
 ) -> ConfigContainer:
     """B300, baseline config."""
-    # NemotronH currently only has FP8_CS base configs
     base_cfg = get_workload_base_config(
         model_family_name="nemotronh",
         model_recipe_name="nemotron_3_nano",
         gpu="b300",
-        compute_dtype="FP8_CS",
+        compute_dtype=precision.upper(),
         task="pretrain",
         config_variant=config_variant,
     )
@@ -104,12 +101,11 @@ def nemotron_3_nano_pretrain_config_b200(
     precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
 ) -> ConfigContainer:
     """B200, baseline config."""
-    # NemotronH currently only has FP8_CS base configs
     base_cfg = get_workload_base_config(
         model_family_name="nemotronh",
         model_recipe_name="nemotron_3_nano",
         gpu="b200",
-        compute_dtype="FP8_CS",
+        compute_dtype=precision.upper(),
         task="pretrain",
         config_variant=config_variant,
     )
@@ -127,12 +123,11 @@ def nemotron_3_nano_pretrain_config_h100(
     precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
 ) -> ConfigContainer:
     """H100, baseline config."""
-    # NemotronH currently only has FP8_CS base configs
     base_cfg = get_workload_base_config(
         model_family_name="nemotronh",
         model_recipe_name="nemotron_3_nano",
         gpu="h100",
-        compute_dtype="FP8_CS",
+        compute_dtype=precision.upper(),
         task="pretrain",
         config_variant=config_variant,
     )

From a6128a7ad43dff63fa396dc50ca5cf6f3676d173 Mon Sep 17 00:00:00 2001
From: Malay Nagda <malayn@nvidia.com>
Date: Tue, 17 Feb 2026 19:42:26 +0530
Subject: [PATCH 4/6] tp=2 for blackwell

Signed-off-by: Malay Nagda <malayn@nvidia.com>
---
 .../nemotron_3_nano_workload_base_configs.py  | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py
index 9d0effc47b..0ed523f233 100644
--- a/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py
+++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py
@@ -43,14 +43,23 @@
 )
 NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1
 
-NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG
-NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1 = BASE_NEMOTRON_3_NANO_CONFIG
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1 = replace(
+    BASE_NEMOTRON_3_NANO_CONFIG,
+    tensor_model_parallel_size=2,
+)
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1
 
-NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG
-NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1 = BASE_NEMOTRON_3_NANO_CONFIG
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 = replace(
+    BASE_NEMOTRON_3_NANO_CONFIG,
+    tensor_model_parallel_size=2,
+)
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1
 
-NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG
-NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1 = BASE_NEMOTRON_3_NANO_CONFIG
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1 = replace(
+    BASE_NEMOTRON_3_NANO_CONFIG,
+    tensor_model_parallel_size=2,
+)
+NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1
 
 NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG
 NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1 = BASE_NEMOTRON_3_NANO_CONFIG

From 33b26e1840ca3cc3bccb3f96613b380d58996e29 Mon Sep 17 00:00:00 2001
From: Malay Nagda <malayn@nvidia.com>
Date: Thu, 19 Feb 2026 14:16:47 +0530
Subject: [PATCH 5/6] TP=1 for Blackwell

Signed-off-by: Malay Nagda <malayn@nvidia.com>
---
 .../nemotronh/nemotron_3_nano_workload_base_configs.py    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py
index 0ed523f233..a042d3bb57 100644
--- a/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py
+++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py
@@ -39,25 +39,25 @@
 
 NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 = replace(
     BASE_NEMOTRON_3_NANO_CONFIG,
-    tensor_model_parallel_size=2,
+    tensor_model_parallel_size=1,
 )
 NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1
 
 NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1 = replace(
     BASE_NEMOTRON_3_NANO_CONFIG,
-    tensor_model_parallel_size=2,
+    tensor_model_parallel_size=1,
 )
 NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1
 
 NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 = replace(
     BASE_NEMOTRON_3_NANO_CONFIG,
-    tensor_model_parallel_size=2,
+    tensor_model_parallel_size=1,
 )
 NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1
 
 NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1 = replace(
     BASE_NEMOTRON_3_NANO_CONFIG,
-    tensor_model_parallel_size=2,
+    tensor_model_parallel_size=1,
 )
 NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1
 

From 957723c7fdbcbe7bfc30a16f4c2fb8e9ef96c659 Mon Sep 17 00:00:00 2001
From: Malay Nagda <malayn@nvidia.com>
Date: Thu, 19 Feb 2026 14:28:06 +0530
Subject: [PATCH 6/6] cleanup

Signed-off-by: Malay Nagda <malayn@nvidia.com>
---
 .../nemotronh/nemotron_3_nano_llm_pretrain.py | 22 +++++--------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py
index 0e3d07e0c6..f71d8e34b9 100644
--- a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py
+++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py
@@ -26,14 +26,12 @@
 
 
 def set_nemotron_3_nano_common_configs(cfg: ConfigContainer) -> None:
-    """Set common performance configurations for all NemotronH configs."""
+    """Set common performance configurations for all Nemotron 3 Nano configs."""
     cfg.mixed_precision.grad_reduce_in_fp32 = False
     cfg.ddp.grad_reduce_in_fp32 = False
 
 
-def nemotron_3_nano_pretrain_config_gb300(
-    precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
-) -> ConfigContainer:
+def nemotron_3_nano_pretrain_config_gb300(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer:
     """GB300, baseline config."""
     base_cfg = get_workload_base_config(
         model_family_name="nemotronh",
@@ -53,9 +51,7 @@ def nemotron_3_nano_pretrain_config_gb300(
     return cfg
 
 
-def nemotron_3_nano_pretrain_config_gb200(
-    precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
-) -> ConfigContainer:
+def nemotron_3_nano_pretrain_config_gb200(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer:
     """GB200, baseline config."""
     base_cfg = get_workload_base_config(
         model_family_name="nemotronh",
@@ -75,9 +71,7 @@ def nemotron_3_nano_pretrain_config_gb200(
     return cfg
 
 
-def nemotron_3_nano_pretrain_config_b300(
-    precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
-) -> ConfigContainer:
+def nemotron_3_nano_pretrain_config_b300(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer:
     """B300, baseline config."""
     base_cfg = get_workload_base_config(
         model_family_name="nemotronh",
@@ -97,9 +91,7 @@ def nemotron_3_nano_pretrain_config_b300(
     return cfg
 
 
-def nemotron_3_nano_pretrain_config_b200(
-    precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
-) -> ConfigContainer:
+def nemotron_3_nano_pretrain_config_b200(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer:
     """B200, baseline config."""
     base_cfg = get_workload_base_config(
         model_family_name="nemotronh",
@@ -119,9 +111,7 @@ def nemotron_3_nano_pretrain_config_b200(
     return cfg
 
 
-def nemotron_3_nano_pretrain_config_h100(
-    precision: str = "bf16", mock: bool = True, config_variant: str = "v1"
-) -> ConfigContainer:
+def nemotron_3_nano_pretrain_config_h100(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer:
     """H100, baseline config."""
     base_cfg = get_workload_base_config(
         model_family_name="nemotronh",