NVIDIA-NeMo · malay-nagda · Feb 26, 2026 · Jan 29, 2026 · Jan 29, 2026 · Feb 25, 2026
diff --git a/scripts/performance/README.md b/scripts/performance/README.md
@@ -151,7 +151,7 @@ python scripts/performance/setup_experiment.py
 - `-a/--account`: Slurm account to use for experiment.
 - `-p/--partition`: Slurm partition to use for experiment.
 - `-t/--time_limit`: Maximum time limit before the Slurm job is cancelled. Format `HH:MM:SS`. Default `00:30:00`.
-- `-gn/--gpus_per_node`: GPUs per node. Default `8`.
+- `-gn/--gpus_per_node`: GPUs per node. Default `None`. If not provided, will be inferred from the GPU type.
 - `-cm/--custom_mounts`: Comma-separated list of host mounts to expose inside the container.
 - `-ce/--custom_env_vars`: Comma-separated string of environment variables (format: `key1=value1,key2=value2`).
 - `-cs/--custom_srun_args`: Comma-separated string of srun arguments.

@@ -26,6 +26,14 @@
 VALID_CUDA_GRAPH_IMPLS = ["none", "local", "transformer_engine"]
 VALID_CUDA_GRAPH_SCOPES = ["full_iteration", "attn", "mlp", "moe", "moe_router", "moe_preprocess", "mamba"]
 
+NUM_GPUS_PER_NODE_MAP = {
+    "h100": 8,
+    "b200": 8,
+    "b300": 8,
+    "gb200": 4,
+    "gb300": 4,
+}
+
 
 def list_of_strings(arg):
     """Split a comma-separated string into a list of substrings."""
@@ -383,8 +391,8 @@ def parse_cli_args():
         "-gn",
         "--gpus_per_node",
         type=int,
-        help="Number of gpus per node. Defaults to 8",
-        default=8,
+        help="Number of gpus per node. Defaults to None. If not provided, will be inferred from the GPU type.",
+        default=None,
     )
-        "-gn",
-        "--gpus_per_node",
-        type=int,
-        help="Number of gpus per node. Defaults to 8",
-        default=8,
-        help="Number of gpus per node. Defaults to None. If not provided, will be inferred from the GPU type.",
-        default=None,
-    )
+        "-gn",
+        "--gpus_per_node",
+        type=positive_int,
+        help="Number of gpus per node. Defaults to None. If not provided, will be inferred from the GPU type.",
+        default=None,
+    )
-        "-gn",
-        "--gpus_per_node",
-        type=int,
-        help="Number of gpus per node. Defaults to 8",
-        default=8,
-        help="Number of gpus per node. Defaults to None. If not provided, will be inferred from the GPU type.",
-        default=None,
-    )
+        "-gn",
+        "--gpus_per_node",
+        type=positive_int,
+        help="Number of gpus per node. Defaults to None. If not provided, will be inferred from the GPU type.",
+        default=None,
+    )
     slurm_args.add_argument(
         "-i",
@@ -500,7 +508,7 @@ def parse_cli_args():
         "-g",
         "--gpu",
         type=str,
-        choices=["h100", "b200", "gb200", "gb300", "b300"],
+        choices=NUM_GPUS_PER_NODE_MAP.keys(),
         help="Target gpu type.",
         required=True,
     )

@@ -28,12 +28,12 @@
 
 
 try:
-    from argument_parser import parse_cli_args
+    from argument_parser import NUM_GPUS_PER_NODE_MAP, parse_cli_args
     from utils.evaluate import calc_convergence_and_performance
     from utils.executors import dgxc_executor, slurm_executor
     from utils.utils import get_exp_name_config, select_config_variant_interactive
 except (ImportError, ModuleNotFoundError):
-    from .argument_parser import parse_cli_args
+    from .argument_parser import NUM_GPUS_PER_NODE_MAP, parse_cli_args
     from .utils.evaluate import calc_convergence_and_performance
     from .utils.executors import dgxc_executor, slurm_executor
     from .utils.utils import get_exp_name_config, select_config_variant_interactive
@@ -529,6 +529,15 @@ def main(
     parser = parse_cli_args()
     args, unknown_args = parser.parse_known_args()
 
+    gpus_per_node = args.gpus_per_node
+    if gpus_per_node is None:
+        if args.gpu in NUM_GPUS_PER_NODE_MAP:
+            gpus_per_node = NUM_GPUS_PER_NODE_MAP[args.gpu]
+        else:
+            raise ValueError(
+                f"Invalid GPU type: {args.gpu}. Please use one of the following: {NUM_GPUS_PER_NODE_MAP.keys()}"
+            )
+
     assert not (args.enable_nsys and args.pytorch_profiler), (
         "Both NSys and PyTorch profiler cannot be enabled at the same time"
     )
@@ -586,7 +595,7 @@ def main(
         account=args.account,
         partition=args.partition,
         log_dir=args.log_dir,
-        gpus_per_node=args.gpus_per_node,
+        gpus_per_node=gpus_per_node,
         time_limit=args.time_limit,
         container_image=args.container_image,
         custom_mounts=args.custom_mounts,