Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions scripts/performance/configs/nemotronh/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@
HAVE_MEGATRON_BRIDGE = False

if HAVE_MEGATRON_BRIDGE:
from .nemotron_3_nano_llm_pretrain import (
nemotron_3_nano_pretrain_config_b200,
nemotron_3_nano_pretrain_config_b300,
nemotron_3_nano_pretrain_config_gb200,
nemotron_3_nano_pretrain_config_gb300,
nemotron_3_nano_pretrain_config_h100,
)
Comment on lines +9 to +15
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Add # noqa: F401 to suppress Flake8 false-positive unused-import errors.

All five symbols are intentionally re-exported via the dynamic __all__.extend() at the bottom, but Flake8's F401 check cannot statically trace the dynamic extend and flags each import as unused. This will fail CI. The same suppression is needed for the existing .nemotronh_llm_pretrain block (lines 16-22) for consistency, though that code is not changed here.

🔧 Proposed fix
-    from .nemotron_3_nano_llm_pretrain import (
-        nemotron_3_nano_pretrain_config_b200,
-        nemotron_3_nano_pretrain_config_b300,
-        nemotron_3_nano_pretrain_config_gb200,
-        nemotron_3_nano_pretrain_config_gb300,
-        nemotron_3_nano_pretrain_config_h100,
-    )
+    from .nemotron_3_nano_llm_pretrain import (  # noqa: F401
+        nemotron_3_nano_pretrain_config_b200,
+        nemotron_3_nano_pretrain_config_b300,
+        nemotron_3_nano_pretrain_config_gb200,
+        nemotron_3_nano_pretrain_config_gb300,
+        nemotron_3_nano_pretrain_config_h100,
+    )
🧰 Tools
🪛 Flake8 (7.3.0)

[error] 9-9: '.nemotron_3_nano_llm_pretrain.nemotron_3_nano_pretrain_config_b200' imported but unused

(F401)


[error] 9-9: '.nemotron_3_nano_llm_pretrain.nemotron_3_nano_pretrain_config_b300' imported but unused

(F401)


[error] 9-9: '.nemotron_3_nano_llm_pretrain.nemotron_3_nano_pretrain_config_gb200' imported but unused

(F401)


[error] 9-9: '.nemotron_3_nano_llm_pretrain.nemotron_3_nano_pretrain_config_gb300' imported but unused

(F401)


[error] 9-9: '.nemotron_3_nano_llm_pretrain.nemotron_3_nano_pretrain_config_h100' imported but unused

(F401)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@scripts/performance/configs/nemotronh/__init__.py` around lines 9 - 15, Add "
# noqa: F401" to the import lines that bring in the re-exported configs so
Flake8 won't flag them as unused; specifically append this noqa comment to the
imports of nemotron_3_nano_pretrain_config_b200,
nemotron_3_nano_pretrain_config_b300, nemotron_3_nano_pretrain_config_gb200,
nemotron_3_nano_pretrain_config_gb300, nemotron_3_nano_pretrain_config_h100 in
the .nemotron_3_nano_llm_pretrain import block, and likewise add the same " #
noqa: F401" to the imports in the existing .nemotronh_llm_pretrain import block
to keep behavior consistent with the dynamic __all__.extend() re-exports.

from .nemotronh_llm_pretrain import (
nemotronh_56b_pretrain_config_b200,
nemotronh_56b_pretrain_config_b300,
Expand All @@ -14,6 +21,18 @@
nemotronh_56b_pretrain_config_h100,
)

from .nemotron_3_nano_workload_base_configs import (
NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1,
NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1,
NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1,
NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1,
NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1,
NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1,
NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1,
NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1,
NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1,
NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1,
)
from .nemotronh_workload_base_configs import (
NEMOTRONH_56B_PRETRAIN_CONFIG_B200_FP8_CS_V1,
NEMOTRONH_56B_PRETRAIN_CONFIG_B300_FP8_CS_V1,
Expand All @@ -29,6 +48,16 @@
"NEMOTRONH_56B_PRETRAIN_CONFIG_B300_FP8_CS_V1",
"NEMOTRONH_56B_PRETRAIN_CONFIG_B200_FP8_CS_V1",
"NEMOTRONH_56B_PRETRAIN_CONFIG_H100_FP8_CS_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1",
]

if HAVE_MEGATRON_BRIDGE:
Expand All @@ -39,5 +68,10 @@
"nemotronh_56b_pretrain_config_b300",
"nemotronh_56b_pretrain_config_b200",
"nemotronh_56b_pretrain_config_h100",
"nemotron_3_nano_pretrain_config_gb300",
"nemotron_3_nano_pretrain_config_gb200",
"nemotron_3_nano_pretrain_config_b300",
"nemotron_3_nano_pretrain_config_b200",
"nemotron_3_nano_pretrain_config_h100",
]
)
131 changes: 131 additions & 0 deletions scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

from utils.overrides import set_workload_base_configs
from utils.precision import get_precision_config
from utils.utils import get_workload_base_config

from megatron.bridge.recipes.nemotronh.nemotron_3_nano import nemotron_3_nano_pretrain_config as pretrain_config
from megatron.bridge.training.config import ConfigContainer


logger = logging.getLogger(__name__)


def set_nemotron_3_nano_common_configs(cfg: ConfigContainer) -> None:
"""Set common performance configurations for all Nemotron 3 Nano configs."""
cfg.mixed_precision.grad_reduce_in_fp32 = False
cfg.ddp.grad_reduce_in_fp32 = False


def nemotron_3_nano_pretrain_config_gb300(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer:
"""GB300, baseline config."""
base_cfg = get_workload_base_config(
model_family_name="nemotronh",
model_recipe_name="nemotron_3_nano",
gpu="gb300",
compute_dtype=precision.upper(),
task="pretrain",
config_variant=config_variant,
)
precision_config = get_precision_config(precision)

cfg = pretrain_config()
cfg.mixed_precision = precision_config
set_nemotron_3_nano_common_configs(cfg)
set_workload_base_configs(cfg, base_cfg)

return cfg


def nemotron_3_nano_pretrain_config_gb200(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer:
"""GB200, baseline config."""
base_cfg = get_workload_base_config(
model_family_name="nemotronh",
model_recipe_name="nemotron_3_nano",
gpu="gb200",
compute_dtype=precision.upper(),
task="pretrain",
config_variant=config_variant,
)
precision_config = get_precision_config(precision)

cfg = pretrain_config()
cfg.mixed_precision = precision_config
set_nemotron_3_nano_common_configs(cfg)
set_workload_base_configs(cfg, base_cfg)

return cfg


def nemotron_3_nano_pretrain_config_b300(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer:
"""B300, baseline config."""
base_cfg = get_workload_base_config(
model_family_name="nemotronh",
model_recipe_name="nemotron_3_nano",
gpu="b300",
compute_dtype=precision.upper(),
task="pretrain",
config_variant=config_variant,
)
precision_config = get_precision_config(precision)

cfg = pretrain_config()
cfg.mixed_precision = precision_config
set_nemotron_3_nano_common_configs(cfg)
set_workload_base_configs(cfg, base_cfg)

return cfg


def nemotron_3_nano_pretrain_config_b200(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer:
"""B200, baseline config."""
base_cfg = get_workload_base_config(
model_family_name="nemotronh",
model_recipe_name="nemotron_3_nano",
gpu="b200",
compute_dtype=precision.upper(),
task="pretrain",
config_variant=config_variant,
)
precision_config = get_precision_config(precision)

cfg = pretrain_config()
cfg.mixed_precision = precision_config
set_nemotron_3_nano_common_configs(cfg)
set_workload_base_configs(cfg, base_cfg)

return cfg


def nemotron_3_nano_pretrain_config_h100(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer:
"""H100, baseline config."""
base_cfg = get_workload_base_config(
model_family_name="nemotronh",
model_recipe_name="nemotron_3_nano",
gpu="h100",
compute_dtype=precision.upper(),
task="pretrain",
config_variant=config_variant,
)
precision_config = get_precision_config(precision)

cfg = pretrain_config()
cfg.mixed_precision = precision_config
set_nemotron_3_nano_common_configs(cfg)
set_workload_base_configs(cfg, base_cfg)

return cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Parallelism presets for Nemotron 3 Nano performance configs.

Config naming convention:
{MODEL}_{SIZE}_{TASK}_CONFIG_{GPU}_{PRECISION}_{VERSION}

V1: 30B_a3b

Use --config_variant to select a variant.
Use --list_config_variants to see available variants interactively.
"""

from dataclasses import replace

from utils.utils import WorkloadBaseConfig


BASE_NEMOTRON_3_NANO_CONFIG = WorkloadBaseConfig(
num_gpus=8,
global_batch_size=3072,
micro_batch_size=2,
tensor_model_parallel_size=4,
expert_tensor_parallel_size=1,
expert_model_parallel_size=8,
)

NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 = replace(
BASE_NEMOTRON_3_NANO_CONFIG,
tensor_model_parallel_size=1,
)
NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1

NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1 = replace(
BASE_NEMOTRON_3_NANO_CONFIG,
tensor_model_parallel_size=1,
)
NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1

NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 = replace(
BASE_NEMOTRON_3_NANO_CONFIG,
tensor_model_parallel_size=1,
)
NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1

NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1 = replace(
BASE_NEMOTRON_3_NANO_CONFIG,
tensor_model_parallel_size=1,
)
NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1

NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG
NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1 = BASE_NEMOTRON_3_NANO_CONFIG

__all__ = [
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1",
"NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1",
]