NVIDIA-NeMo · yaoyu-33 · Mar 26, 2026 · Mar 22, 2026 · Mar 22, 2026 · Mar 23, 2026
diff --git a/examples/conversion/compare_hf_and_megatron/compare.py b/examples/conversion/compare_hf_and_megatron/compare.py
@@ -446,7 +446,7 @@ def _load_hf_model(args, is_vl_model: bool):
     hf_model = model_class.from_pretrained(
         args.hf_model_path,
         torch_dtype=torch.bfloat16,
-        device_map="cuda",
+        device_map="auto",
         trust_remote_code=is_safe_repo(
             trust_remote_code=args.trust_remote_code,
             hf_path=args.hf_model_path,

diff --git a/examples/models/nemotron_3/README.md b/examples/models/nemotron_3/README.md
@@ -1,8 +1,11 @@
 # Nemotron 3 Examples
 
-This directory contains example scripts for Nemotron 3 language models.
+This directory contains example scripts for Nemotron 3 language models:
 
-For model introduction and architecture details, see the Nemotron 3 documentation.
+| Model | Parameters | Active Parameters | Subdirectory |
+|-------|-----------|-------------------|--------------|
+| Nemotron 3 Nano | 30B | A3B | [nano/](nano/) |
+| Nemotron 3 Super | 120B | A12B | [super/](super/) |
 
 ## Workspace Configuration
 
@@ -18,47 +21,21 @@ Directory structure:
 
 ## Checkpoint Conversion
 
-See the [conversion.sh](conversion.sh) script for checkpoint conversion examples.
+Each model has its own conversion script: [nano/conversion.sh](nano/conversion.sh), [super/conversion.sh](super/conversion.sh).
 
-### Import HF → Megatron
-
-To import the HF model to your desired Megatron path:
-
-```bash
-python examples/conversion/convert_checkpoints.py import \
-    --hf-model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16 \
-    --megatron-path ${WORKSPACE}/models/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16 \
-    --trust-remote-code
-```
-
-### Export Megatron → HF
-
-```bash
-python examples/conversion/convert_checkpoints.py export \
-    --hf-model nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16 \
-    --megatron-path ${WORKSPACE}/models/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16/iter_0000000 \
-    --hf-path ${WORKSPACE}/models/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16-hf-export
-```
-
-### Round-trip Validation
-
-Multi-GPU round-trip validation between formats:
+## Training Recipes
 
-```bash
-python -m torch.distributed.run --nproc_per_node=8 \
-    examples/conversion/hf_megatron_roundtrip_multi_gpu.py \
-    --hf-model-id nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16 \
-    --megatron-load-path ${WORKSPACE}/models/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16/iter_0000000 \
-    --tp 2 --pp 2 \
-    --trust-remote-code
-```
+Available recipes:
 
-## Training Recipes
+**Nano** ([source](../../../src/megatron/bridge/recipes/nemotronh/nemotron_3_nano.py)):
+- `nemotron_3_nano_pretrain_config`: Pretraining
+- `nemotron_3_nano_sft_config`: Supervised fine-tuning
+- `nemotron_3_nano_peft_config`: PEFT with LoRA support
 
-- See: [bridge.recipes.nemotronh](../../../src/megatron/bridge/recipes/nemotronh/nemotron_3_nano.py)
-- Available recipes:
-  - `nemotron_3_nano_pretrain_config`: Pretraining configuration
-  - `nemotron_3_nano_finetune_config`: Finetuning configuration with PEFT support
+**Super** ([source](../../../src/megatron/bridge/recipes/nemotronh/nemotron_3_super.py)):
+- `nemotron_3_super_pretrain_config`: Pretraining
+- `nemotron_3_super_sft_config`: Supervised fine-tuning
+- `nemotron_3_super_peft_config`: PEFT with LoRA support
 
 Before training, ensure the following are configured:
 1. **Container Image**: Set `CONTAINER_IMAGE` in the SLURM scripts to your container path
@@ -70,23 +47,13 @@ Before training, ensure the following are configured:
 
 All training scripts use SLURM for containerized multi-node training.
 
-### Pretrain
-
-See the [slurm_pretrain.sh](slurm_pretrain.sh) script for pretraining with configurable model parallelisms.
-
-W&B report coming soon.
-
-### Supervised Fine-Tuning (SFT)
-
-See the [slurm_sft.sh](slurm_sft.sh) script for full parameter fine-tuning.
-
-W&B report coming soon.
+### Nano
 
-### Parameter-Efficient Fine-Tuning (PEFT) with LoRA
+See the SLURM scripts in [nano/](nano/): [slurm_pretrain.sh](nano/slurm_pretrain.sh), [slurm_sft.sh](nano/slurm_sft.sh), [slurm_peft.sh](nano/slurm_peft.sh).
 
-See the [slurm_peft.sh](slurm_peft.sh) script for LoRA fine-tuning.
+### Super
 
-W&B report coming soon.
+See the SLURM scripts in [super/](super/): [slurm_pretrain.sh](super/slurm_pretrain.sh), [slurm_sft.sh](super/slurm_sft.sh), [slurm_peft.sh](super/slurm_peft.sh).
 
 ## Evaluation
 

diff --git a/examples/models/nemotron_3/conversion.sh → ...ples/models/nemotron_3/nano/conversion.sh b/examples/models/nemotron_3/conversion.sh → ...ples/models/nemotron_3/nano/conversion.sh
diff --git a/...ls/nemotron_3/finetune_nemotron_3_nano.py → ...motron_3/nano/finetune_nemotron_3_nano.py b/...ls/nemotron_3/finetune_nemotron_3_nano.py → ...motron_3/nano/finetune_nemotron_3_nano.py
diff --git a/...ls/nemotron_3/pretrain_nemotron_3_nano.py → ...motron_3/nano/pretrain_nemotron_3_nano.py b/...ls/nemotron_3/pretrain_nemotron_3_nano.py → ...motron_3/nano/pretrain_nemotron_3_nano.py
diff --git a/examples/models/nemotron_3/slurm_peft.sh → ...ples/models/nemotron_3/nano/slurm_peft.sh b/examples/models/nemotron_3/slurm_peft.sh → ...ples/models/nemotron_3/nano/slurm_peft.sh
@@ -86,8 +86,8 @@ export NCCL_NVLS_ENABLE=0
 # export HF_HOME="/path/to/shared/HF_HOME"
 
 # Authentication tokens (set these for your environment)
-# export HF_TOKEN="hf_your_token_here"
-# export WANDB_API_KEY="your_wandb_key_here"
+# export HF_TOKEN=
+# export WANDB_API_KEY=
 
 # ==============================================================================
 # Job Execution

diff --git a/examples/models/nemotron_3/slurm_pretrain.sh → .../models/nemotron_3/nano/slurm_pretrain.sh b/examples/models/nemotron_3/slurm_pretrain.sh → .../models/nemotron_3/nano/slurm_pretrain.sh
@@ -83,8 +83,8 @@ export NCCL_NVLS_ENABLE=0
 # export HF_HOME="/path/to/shared/HF_HOME"
 
 # Authentication tokens (set these for your environment)
-# export HF_TOKEN="hf_your_token_here"
-# export WANDB_API_KEY="your_wandb_key_here"
+# export HF_TOKEN=
+# export WANDB_API_KEY=
 
 # ==============================================================================
 # Job Execution

diff --git a/examples/models/nemotron_3/slurm_sft.sh → examples/models/nemotron_3/nano/slurm_sft.sh b/examples/models/nemotron_3/slurm_sft.sh → examples/models/nemotron_3/nano/slurm_sft.sh
@@ -85,8 +85,8 @@ export NCCL_NVLS_ENABLE=0
 # export HF_HOME="/path/to/shared/HF_HOME"
 
 # Authentication tokens (set these for your environment)
-# export HF_TOKEN="hf_your_token_here"
-# export WANDB_API_KEY="your_wandb_key_here"
+# export HF_TOKEN=
+# export WANDB_API_KEY=
 
 # ==============================================================================
 # Job Execution

diff --git a/examples/models/nemotron_3/super/conversion.sh b/examples/models/nemotron_3/super/conversion.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -xeuo pipefail
+
+# Workspace directory for checkpoints and results
+WORKSPACE=${WORKSPACE:-/workspace}
+
+MODEL_NAME=NVIDIA-Nemotron-3-Super-120B-A12B-BF16
+HF_MODEL_ID=nvidia/$MODEL_NAME
+
+# Import HF → Megatron
+uv run torchrun --nproc_per_node=8 examples/conversion/convert_checkpoints_multi_gpu.py import \
+    --hf-model $HF_MODEL_ID \
+    --megatron-path ${WORKSPACE}/models/$MODEL_NAME \
+    --tp 1 --ep 8
+
+
+# Export Megatron → HF
+uv run torchrun --nproc_per_node=8 examples/conversion/convert_checkpoints_multi_gpu.py export \
+    --hf-model $HF_MODEL_ID \
+    --megatron-path ${WORKSPACE}/models/$MODEL_NAME/iter_0000000 \
+    --hf-path ${WORKSPACE}/models/$MODEL_NAME-hf-export \
+    --tp 1 --ep 8 
+
+
+# Round-trip validation
+uv run torchrun --nproc_per_node=8 \
+    examples/conversion/hf_megatron_roundtrip_multi_gpu.py \
+    --hf-model-id $HF_MODEL_ID \
+    --megatron-load-path ${WORKSPACE}/models/$MODEL_NAME/iter_0000000 \
+    --tp 1 --ep 8
diff --git a/examples/models/nemotron_3/super/finetune_nemotron_3_super.py b/examples/models/nemotron_3/super/finetune_nemotron_3_super.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import logging
+import os
+import sys
+from typing import Tuple
+
+import torch
+from omegaconf import OmegaConf
+
+from megatron.bridge.recipes.nemotronh.nemotron_3_super import (
+    nemotron_3_super_peft_config,
+    nemotron_3_super_sft_config,
+)
+from megatron.bridge.training.config import ConfigContainer
+from megatron.bridge.training.finetune import finetune
+from megatron.bridge.training.gpt_step import forward_step
+from megatron.bridge.training.utils.omegaconf_utils import (
+    apply_overrides,
+    create_omegaconf_dict_config,
+    parse_hydra_overrides,
+)
+
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+def parse_cli_args() -> Tuple[argparse.Namespace, list[str]]:
+    """Parse command line arguments, separating known script args from OmegaConf overrides."""
+    parser = argparse.ArgumentParser(
+        description="Finetune Nemotron 3 Super model using Megatron-Bridge with YAML and CLI overrides",
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "--config-file",
+        type=str,
+        help="Path to the YAML OmegaConf override file.",
+    )
+    parser.add_argument("--peft", type=str, help="Type of PEFT to use")
+    parser.add_argument("--seq-length", type=int, default=8192, help="Sequence length")
+
+    # Parse known args for the script, remaining will be treated as overrides
+    args, cli_dotlist_overrides = parser.parse_known_args()
+    return args, cli_dotlist_overrides
+
+
+def main() -> None:
+    """
+    Entry point for the Nemotron 3 Super finetuning script.
+    """
+    args, cli_overrides = parse_cli_args()
+
+    if args.peft is None or (isinstance(args.peft, str) and args.peft.lower() == "none"):
+        cfg: ConfigContainer = nemotron_3_super_sft_config()
+    else:
+        cfg: ConfigContainer = nemotron_3_super_peft_config(peft_scheme=args.peft)
+    cfg.model.seq_length = args.seq_length
+
+    # Convert the initial Python dataclass to an OmegaConf DictConfig for merging
+    merged_omega_conf, excluded_fields = create_omegaconf_dict_config(cfg)
+
+    # Load and merge YAML overrides if a config file is provided
+    if args.config_file:
+        logger.debug(f"Loading YAML overrides from: {args.config_file}")
+        if not os.path.exists(args.config_file):
+            logger.error(f"Override YAML file not found: {args.config_file}")
+            sys.exit(1)
+        yaml_overrides_omega = OmegaConf.load(args.config_file)
+        merged_omega_conf = OmegaConf.merge(merged_omega_conf, yaml_overrides_omega)
+        logger.debug("YAML overrides merged successfully.")
+
+    # Apply command-line overrides using Hydra-style parsing
+    if cli_overrides:
+        logger.debug(f"Applying Hydra-style command-line overrides: {cli_overrides}")
+        merged_omega_conf = parse_hydra_overrides(merged_omega_conf, cli_overrides)
+        logger.debug("Hydra-style command-line overrides applied successfully.")
+
+    # Apply the final merged OmegaConf configuration back to the original ConfigContainer
+    logger.debug("Applying final merged configuration back to Python ConfigContainer...")
+    final_overrides_as_dict = OmegaConf.to_container(merged_omega_conf, resolve=True)
+    # Apply overrides while preserving excluded fields
+    apply_overrides(cfg, final_overrides_as_dict, excluded_fields)
+
+    # Start training
+    logger.debug("Starting finetuning...")
+    finetune(config=cfg, forward_step_func=forward_step)
+
+    if torch.distributed.is_initialized():
+        torch.distributed.destroy_process_group()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/models/nemotron_3/super/pretrain_nemotron_3_super.py b/examples/models/nemotron_3/super/pretrain_nemotron_3_super.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python3
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import logging
+import os
+import sys
+from typing import Tuple
+
+import torch
+from omegaconf import OmegaConf
+
+from megatron.bridge.recipes.nemotronh.nemotron_3_super import (
+    nemotron_3_super_pretrain_config as pretrain_config,
+)
+from megatron.bridge.training.config import ConfigContainer
+from megatron.bridge.training.gpt_step import forward_step
+from megatron.bridge.training.pretrain import pretrain
+from megatron.bridge.training.utils.omegaconf_utils import (
+    apply_overrides,
+    create_omegaconf_dict_config,
+    parse_hydra_overrides,
+)
+
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+def parse_cli_args() -> Tuple[argparse.Namespace, list[str]]:
+    """Parse command line arguments, separating known script args from OmegaConf overrides."""
+    parser = argparse.ArgumentParser(
+        description="Pretrain Nemotron 3 Super model using Megatron-Bridge with YAML and CLI overrides",
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "--config-file",
+        type=str,
+        help="Path to the YAML OmegaConf override file. Default: conf/llama3_8b_pretrain_override_example.yaml",
+    )
+
+    # Parse known args for the script, remaining will be treated as overrides
+    args, cli_dotlist_overrides = parser.parse_known_args()
+    return args, cli_dotlist_overrides
+
+
+def main() -> None:
+    """
+    Entry point for the Nemotron 3 Super pretraining script.
+    """
+    args, cli_overrides = parse_cli_args()
+
+    cfg: ConfigContainer = pretrain_config()
+
+    # Convert the initial Python dataclass to an OmegaConf DictConfig for merging
+    merged_omega_conf, excluded_fields = create_omegaconf_dict_config(cfg)
+
+    # Load and merge YAML overrides if a config file is provided
+    if args.config_file:
+        logger.debug(f"Loading YAML overrides from: {args.config_file}")
+        if not os.path.exists(args.config_file):
+            logger.error(f"Override YAML file not found: {args.config_file}")
+            sys.exit(1)
+        yaml_overrides_omega = OmegaConf.load(args.config_file)
+        merged_omega_conf = OmegaConf.merge(merged_omega_conf, yaml_overrides_omega)
+        logger.debug("YAML overrides merged successfully.")
+
+    # Apply command-line overrides using Hydra-style parsing
+    if cli_overrides:
+        logger.debug(f"Applying Hydra-style command-line overrides: {cli_overrides}")
+        merged_omega_conf = parse_hydra_overrides(merged_omega_conf, cli_overrides)
+        logger.debug("Hydra-style command-line overrides applied successfully.")
+
+    # Apply the final merged OmegaConf configuration back to the original ConfigContainer
+    logger.debug("Applying final merged configuration back to Python ConfigContainer...")
+    final_overrides_as_dict = OmegaConf.to_container(merged_omega_conf, resolve=True)
+    # Apply overrides while preserving excluded fields
+    apply_overrides(cfg, final_overrides_as_dict, excluded_fields)
+
+    # Start training
+    logger.debug("Starting pretraining...")
+    pretrain(config=cfg, forward_step_func=forward_step)
+
+    if torch.distributed.is_initialized():
+        torch.distributed.destroy_process_group()
+
+
+if __name__ == "__main__":
+    main()