NVIDIA-NeMo · terrykong · Aug 27, 2025 · Aug 27, 2025 · Aug 27, 2025
@@ -82,6 +82,7 @@ class MegatronDDPConfig(TypedDict):
 
 class MegatronConfig(TypedDict):
     enabled: bool
+    env_vars: NotRequired[dict[str, str]]
     empty_unused_memory_level: int
     activation_checkpointing: bool
     converter_type: str

@@ -225,8 +225,8 @@ def __init__(
         )
 
         # reward model
-        self._is_reward_model = self.cfg.get("reward_model_cfg", {}).get(
-            "enabled", False
+        self._is_reward_model = (
+            "reward_model_cfg" in self.cfg and self.cfg["reward_model_cfg"]["enabled"]
         )
         if self._is_reward_model:
             # Ensure sequence packing is disabled.

@@ -165,8 +165,8 @@ def __init__(
             else None,
         )
 
-        self._is_reward_model = self.cfg.get("reward_model_cfg", {}).get(
-            "enabled", False
+        self._is_reward_model = (
+            "reward_model_cfg" in self.cfg and self.cfg["reward_model_cfg"]["enabled"]
         )
         if self._is_reward_model:
             # Ensure sequence packing is disabled.

@@ -75,7 +75,7 @@ def __init__(
         pp_size = 1
         cp_size = 1
 
-        megatron_enable = config.get("megatron_cfg", {}).get("enabled", False)
+        megatron_enable = "megatron_cfg" in config and config["megatron_cfg"]["enabled"]
         if megatron_enable:
             worker_builder_cls = (
                 "nemo_rl.models.policy.megatron_policy_worker.MegatronPolicyWorker"
@@ -175,9 +175,7 @@ def __init__(
             self.use_sequence_packing = True
             self.sequence_packing_args: SequencePackingArgs = {
                 "train_mb_tokens": config["sequence_packing"]["train_mb_tokens"],
-                "logprob_mb_tokens": config["sequence_packing"].get(
-                    "logprob_mb_tokens", None
-                ),
+                "logprob_mb_tokens": config["sequence_packing"]["logprob_mb_tokens"],
                 "algorithm": config["sequence_packing"]["algorithm"],
                 "input_key": "input_ids",
                 "input_lengths_key": "input_lengths",

@@ -399,7 +399,7 @@ def __init__(
         self.dtype = dtype_map[self.cfg["precision"]]
 
         # Reward models are not yet supported with Megatron.
-        if self.cfg.get("reward_model_cfg", {}).get("enabled", False):
+        if "reward_model_cfg" in self.cfg and self.cfg["reward_model_cfg"]["enabled"]:
             raise NotImplementedError(
                 "Reward models are not yet supported with the Megatron backend, this issue is "
                 "tracked in https://github.com/NVIDIA-NeMo/RL/issues/720"

@@ -196,6 +196,7 @@ addopts = "--durations=15 -s -rA -x"
 testpaths = ["tests"]
 python_files = "test_*.py"
 markers = [
+    "run_first: marks tests that should run before others",
     "mcore: marks tests that require the mcore extra",
     "hf_gated: marks tests that require HuggingFace token access for gated models",
 ]

@@ -53,8 +53,9 @@ def pytest_collection_modifyitems(config, items):
     run_mcore_only = config.getoption("--mcore-only")
     marker_expr = config.getoption("-m", default="")
 
-    # If user specified -m marker expressions, let pytest handle everything normally
+    # If user specified -m marker expressions, still prioritize run_first tests
     if marker_expr:
+        items.sort(key=lambda item: 0 if item.get_closest_marker("run_first") else 1)
         return
 
     # Filter tests based on the desired configurations
@@ -83,6 +84,9 @@ def pytest_collection_modifyitems(config, items):
             and not item.get_closest_marker("mcore")
         ]
 
+    # Ensure run_first tests are prioritized
+    new_items.sort(key=lambda item: 0 if item.get_closest_marker("run_first") else 1)
+
     # Update the items list in-place
     items[:] = new_items
 

@@ -16,10 +16,6 @@
 import pytest
 import ray
 import torch
-
-# Define a custom marker for model configuration tests
-pytestmark = pytest.mark.modelconfig
-
 from transformers import AutoModelForCausalLM
 
 from nemo_rl.algorithms.interfaces import LossFunction

@@ -110,9 +110,6 @@ def two_gpu_virtual_cluster():
     cluster.shutdown()
 
 
-# Define a custom marker for model configuration tests
-pytestmark = pytest.mark.modelconfig
-
 from nemo_rl.algorithms.utils import get_tokenizer
 from nemo_rl.models.policy.lm_policy import Policy
 

@@ -18,9 +18,6 @@
 import pytest
 import torch
 
-# Define a custom marker for model configuration tests
-pytestmark = pytest.mark.modelconfig
-
 from nemo_rl.algorithms.interfaces import LossFunction
 from nemo_rl.algorithms.loss_functions import ClippedPGLossFn, DPOLossFn, NLLLoss
 from nemo_rl.algorithms.utils import get_tokenizer

@@ -32,6 +32,9 @@
 from nemo_rl.utils.config import load_config_with_inheritance
 from nemo_rl.utils.logger import LoggerConfig
 
+# All tests in this module should run first
+pytestmark = pytest.mark.run_first
+
 
 def get_keys_from_typeddict(typed_dict_class: dict) -> Set[str]:
     """Extract required keys from a TypedDict class, excluding NotRequired fields."""

@@ -17,6 +17,9 @@
 
 import pytest
 
+# All tests in this module should run first
+pytestmark = pytest.mark.run_first
+
 dir_path = os.path.dirname(os.path.abspath(__file__))
 project_root = os.path.abspath(os.path.join(dir_path, "..", ".."))
 configs_dir = os.path.join(project_root, "examples", "configs")