diff --git a/scripts/performance/configs/nemotronh/__init__.py b/scripts/performance/configs/nemotronh/__init__.py index aa3f16830e..dc597c4c19 100644 --- a/scripts/performance/configs/nemotronh/__init__.py +++ b/scripts/performance/configs/nemotronh/__init__.py @@ -6,6 +6,13 @@ HAVE_MEGATRON_BRIDGE = False if HAVE_MEGATRON_BRIDGE: + from .nemotron_3_nano_llm_pretrain import ( + nemotron_3_nano_pretrain_config_b200, + nemotron_3_nano_pretrain_config_b300, + nemotron_3_nano_pretrain_config_gb200, + nemotron_3_nano_pretrain_config_gb300, + nemotron_3_nano_pretrain_config_h100, + ) from .nemotronh_llm_pretrain import ( nemotronh_56b_pretrain_config_b200, nemotronh_56b_pretrain_config_b300, @@ -14,6 +21,18 @@ nemotronh_56b_pretrain_config_h100, ) +from .nemotron_3_nano_workload_base_configs import ( + NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1, + NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1, +) from .nemotronh_workload_base_configs import ( NEMOTRONH_56B_PRETRAIN_CONFIG_B200_FP8_CS_V1, NEMOTRONH_56B_PRETRAIN_CONFIG_B300_FP8_CS_V1, @@ -29,6 +48,16 @@ "NEMOTRONH_56B_PRETRAIN_CONFIG_B300_FP8_CS_V1", "NEMOTRONH_56B_PRETRAIN_CONFIG_B200_FP8_CS_V1", "NEMOTRONH_56B_PRETRAIN_CONFIG_H100_FP8_CS_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1", ] if HAVE_MEGATRON_BRIDGE: @@ -39,5 +68,10 @@ "nemotronh_56b_pretrain_config_b300", "nemotronh_56b_pretrain_config_b200", "nemotronh_56b_pretrain_config_h100", + "nemotron_3_nano_pretrain_config_gb300", + "nemotron_3_nano_pretrain_config_gb200", + "nemotron_3_nano_pretrain_config_b300", + "nemotron_3_nano_pretrain_config_b200", + "nemotron_3_nano_pretrain_config_h100", ] ) diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py new file mode 100644 index 0000000000..f71d8e34b9 --- /dev/null +++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py @@ -0,0 +1,131 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from utils.overrides import set_workload_base_configs +from utils.precision import get_precision_config +from utils.utils import get_workload_base_config + +from megatron.bridge.recipes.nemotronh.nemotron_3_nano import nemotron_3_nano_pretrain_config as pretrain_config +from megatron.bridge.training.config import ConfigContainer + + +logger = logging.getLogger(__name__) + + +def set_nemotron_3_nano_common_configs(cfg: ConfigContainer) -> None: + """Set common performance configurations for all Nemotron 3 Nano configs.""" + cfg.mixed_precision.grad_reduce_in_fp32 = False + cfg.ddp.grad_reduce_in_fp32 = False + + +def nemotron_3_nano_pretrain_config_gb300(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer: + """GB300, baseline config.""" + base_cfg = get_workload_base_config( + model_family_name="nemotronh", + model_recipe_name="nemotron_3_nano", + gpu="gb300", + compute_dtype=precision.upper(), + task="pretrain", + config_variant=config_variant, + ) + precision_config = get_precision_config(precision) + + cfg = pretrain_config() + cfg.mixed_precision = precision_config + set_nemotron_3_nano_common_configs(cfg) + set_workload_base_configs(cfg, base_cfg) + + return cfg + + +def nemotron_3_nano_pretrain_config_gb200(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer: + """GB200, baseline config.""" + base_cfg = get_workload_base_config( + model_family_name="nemotronh", + model_recipe_name="nemotron_3_nano", + gpu="gb200", + compute_dtype=precision.upper(), + task="pretrain", + config_variant=config_variant, + ) + precision_config = get_precision_config(precision) + + cfg = pretrain_config() + cfg.mixed_precision = precision_config + set_nemotron_3_nano_common_configs(cfg) + set_workload_base_configs(cfg, base_cfg) + + return cfg + + +def nemotron_3_nano_pretrain_config_b300(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer: + """B300, baseline config.""" + base_cfg = get_workload_base_config( + model_family_name="nemotronh", + model_recipe_name="nemotron_3_nano", + gpu="b300", + compute_dtype=precision.upper(), + task="pretrain", + config_variant=config_variant, + ) + precision_config = get_precision_config(precision) + + cfg = pretrain_config() + cfg.mixed_precision = precision_config + set_nemotron_3_nano_common_configs(cfg) + set_workload_base_configs(cfg, base_cfg) + + return cfg + + +def nemotron_3_nano_pretrain_config_b200(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer: + """B200, baseline config.""" + base_cfg = get_workload_base_config( + model_family_name="nemotronh", + model_recipe_name="nemotron_3_nano", + gpu="b200", + compute_dtype=precision.upper(), + task="pretrain", + config_variant=config_variant, + ) + precision_config = get_precision_config(precision) + + cfg = pretrain_config() + cfg.mixed_precision = precision_config + set_nemotron_3_nano_common_configs(cfg) + set_workload_base_configs(cfg, base_cfg) + + return cfg + + +def nemotron_3_nano_pretrain_config_h100(precision: str = "bf16", config_variant: str = "v1") -> ConfigContainer: + """H100, baseline config.""" + base_cfg = get_workload_base_config( + model_family_name="nemotronh", + model_recipe_name="nemotron_3_nano", + gpu="h100", + compute_dtype=precision.upper(), + task="pretrain", + config_variant=config_variant, + ) + precision_config = get_precision_config(precision) + + cfg = pretrain_config() + cfg.mixed_precision = precision_config + set_nemotron_3_nano_common_configs(cfg) + set_workload_base_configs(cfg, base_cfg) + + return cfg diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py new file mode 100644 index 0000000000..a042d3bb57 --- /dev/null +++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py @@ -0,0 +1,78 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Parallelism presets for Nemotron 3 Nano performance configs. + +Config naming convention: + {MODEL}_{SIZE}_{TASK}_CONFIG_{GPU}_{PRECISION}_{VERSION} + +V1: 30B_a3b + +Use --config_variant to select a variant. +Use --list_config_variants to see available variants interactively. +""" + +from dataclasses import replace + +from utils.utils import WorkloadBaseConfig + + +BASE_NEMOTRON_3_NANO_CONFIG = WorkloadBaseConfig( + num_gpus=8, + global_batch_size=3072, + micro_batch_size=2, + tensor_model_parallel_size=4, + expert_tensor_parallel_size=1, + expert_model_parallel_size=8, +) + +NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 = replace( + BASE_NEMOTRON_3_NANO_CONFIG, + tensor_model_parallel_size=1, +) +NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 + +NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1 = replace( + BASE_NEMOTRON_3_NANO_CONFIG, + tensor_model_parallel_size=1, +) +NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1 + +NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 = replace( + BASE_NEMOTRON_3_NANO_CONFIG, + tensor_model_parallel_size=1, +) +NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1 + +NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1 = replace( + BASE_NEMOTRON_3_NANO_CONFIG, + tensor_model_parallel_size=1, +) +NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1 + +NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1 = BASE_NEMOTRON_3_NANO_CONFIG +NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1 = BASE_NEMOTRON_3_NANO_CONFIG + +__all__ = [ + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB200_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B300_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_B200_FP8_MX_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_BF16_V1", + "NEMOTRON_3_NANO_PRETRAIN_CONFIG_H100_FP8_CS_V1", +]