diff --git a/nemo_rl/models/policy/__init__.py b/nemo_rl/models/policy/__init__.py index b909d89ed6..3f2fcfe877 100644 --- a/nemo_rl/models/policy/__init__.py +++ b/nemo_rl/models/policy/__init__.py @@ -82,6 +82,7 @@ class MegatronDDPConfig(TypedDict): class MegatronConfig(TypedDict): enabled: bool + env_vars: NotRequired[dict[str, str]] empty_unused_memory_level: int activation_checkpointing: bool converter_type: str diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py index 2065a74090..7b2f0de271 100644 --- a/nemo_rl/models/policy/dtensor_policy_worker.py +++ b/nemo_rl/models/policy/dtensor_policy_worker.py @@ -225,8 +225,8 @@ def __init__( ) # reward model - self._is_reward_model = self.cfg.get("reward_model_cfg", {}).get( - "enabled", False + self._is_reward_model = ( + "reward_model_cfg" in self.cfg and self.cfg["reward_model_cfg"]["enabled"] ) if self._is_reward_model: # Ensure sequence packing is disabled. diff --git a/nemo_rl/models/policy/dtensor_policy_worker_v2.py b/nemo_rl/models/policy/dtensor_policy_worker_v2.py index b7d7eb163a..8d56c3e6eb 100644 --- a/nemo_rl/models/policy/dtensor_policy_worker_v2.py +++ b/nemo_rl/models/policy/dtensor_policy_worker_v2.py @@ -165,8 +165,8 @@ def __init__( else None, ) - self._is_reward_model = self.cfg.get("reward_model_cfg", {}).get( - "enabled", False + self._is_reward_model = ( + "reward_model_cfg" in self.cfg and self.cfg["reward_model_cfg"]["enabled"] ) if self._is_reward_model: # Ensure sequence packing is disabled. diff --git a/nemo_rl/models/policy/lm_policy.py b/nemo_rl/models/policy/lm_policy.py index 7867688b43..3d2a6e2d9e 100644 --- a/nemo_rl/models/policy/lm_policy.py +++ b/nemo_rl/models/policy/lm_policy.py @@ -75,7 +75,7 @@ def __init__( pp_size = 1 cp_size = 1 - megatron_enable = config.get("megatron_cfg", {}).get("enabled", False) + megatron_enable = "megatron_cfg" in config and config["megatron_cfg"]["enabled"] if megatron_enable: worker_builder_cls = ( "nemo_rl.models.policy.megatron_policy_worker.MegatronPolicyWorker" @@ -175,9 +175,7 @@ def __init__( self.use_sequence_packing = True self.sequence_packing_args: SequencePackingArgs = { "train_mb_tokens": config["sequence_packing"]["train_mb_tokens"], - "logprob_mb_tokens": config["sequence_packing"].get( - "logprob_mb_tokens", None - ), + "logprob_mb_tokens": config["sequence_packing"]["logprob_mb_tokens"], "algorithm": config["sequence_packing"]["algorithm"], "input_key": "input_ids", "input_lengths_key": "input_lengths", diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py index 70a80b1b8f..9a16625db2 100644 --- a/nemo_rl/models/policy/megatron_policy_worker.py +++ b/nemo_rl/models/policy/megatron_policy_worker.py @@ -399,7 +399,7 @@ def __init__( self.dtype = dtype_map[self.cfg["precision"]] # Reward models are not yet supported with Megatron. - if self.cfg.get("reward_model_cfg", {}).get("enabled", False): + if "reward_model_cfg" in self.cfg and self.cfg["reward_model_cfg"]["enabled"]: raise NotImplementedError( "Reward models are not yet supported with the Megatron backend, this issue is " "tracked in https://github.com/NVIDIA-NeMo/RL/issues/720" diff --git a/pyproject.toml b/pyproject.toml index 84df2fb543..0a40539cc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -196,6 +196,7 @@ addopts = "--durations=15 -s -rA -x" testpaths = ["tests"] python_files = "test_*.py" markers = [ + "run_first: marks tests that should run before others", "mcore: marks tests that require the mcore extra", "hf_gated: marks tests that require HuggingFace token access for gated models", ] diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 978131969a..1f7dad4a2b 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -53,8 +53,9 @@ def pytest_collection_modifyitems(config, items): run_mcore_only = config.getoption("--mcore-only") marker_expr = config.getoption("-m", default="") - # If user specified -m marker expressions, let pytest handle everything normally + # If user specified -m marker expressions, still prioritize run_first tests if marker_expr: + items.sort(key=lambda item: 0 if item.get_closest_marker("run_first") else 1) return # Filter tests based on the desired configurations @@ -83,6 +84,9 @@ def pytest_collection_modifyitems(config, items): and not item.get_closest_marker("mcore") ] + # Ensure run_first tests are prioritized + new_items.sort(key=lambda item: 0 if item.get_closest_marker("run_first") else 1) + # Update the items list in-place items[:] = new_items diff --git a/tests/unit/models/policy/test_dtensor_worker.py b/tests/unit/models/policy/test_dtensor_worker.py index 013635d898..40243e2b5a 100644 --- a/tests/unit/models/policy/test_dtensor_worker.py +++ b/tests/unit/models/policy/test_dtensor_worker.py @@ -16,10 +16,6 @@ import pytest import ray import torch - -# Define a custom marker for model configuration tests -pytestmark = pytest.mark.modelconfig - from transformers import AutoModelForCausalLM from nemo_rl.algorithms.interfaces import LossFunction diff --git a/tests/unit/models/policy/test_dtensor_worker_v2.py b/tests/unit/models/policy/test_dtensor_worker_v2.py index 1268da33f9..a16e3afda5 100644 --- a/tests/unit/models/policy/test_dtensor_worker_v2.py +++ b/tests/unit/models/policy/test_dtensor_worker_v2.py @@ -110,9 +110,6 @@ def two_gpu_virtual_cluster(): cluster.shutdown() -# Define a custom marker for model configuration tests -pytestmark = pytest.mark.modelconfig - from nemo_rl.algorithms.utils import get_tokenizer from nemo_rl.models.policy.lm_policy import Policy diff --git a/tests/unit/models/policy/test_megatron_worker.py b/tests/unit/models/policy/test_megatron_worker.py index f0175616c9..48d315137c 100644 --- a/tests/unit/models/policy/test_megatron_worker.py +++ b/tests/unit/models/policy/test_megatron_worker.py @@ -18,9 +18,6 @@ import pytest import torch -# Define a custom marker for model configuration tests -pytestmark = pytest.mark.modelconfig - from nemo_rl.algorithms.interfaces import LossFunction from nemo_rl.algorithms.loss_functions import ClippedPGLossFn, DPOLossFn, NLLLoss from nemo_rl.algorithms.utils import get_tokenizer diff --git a/tests/unit/test_config_validation.py b/tests/unit/test_config_validation.py index 3056ae270d..e5fa73ee7d 100644 --- a/tests/unit/test_config_validation.py +++ b/tests/unit/test_config_validation.py @@ -32,6 +32,9 @@ from nemo_rl.utils.config import load_config_with_inheritance from nemo_rl.utils.logger import LoggerConfig +# All tests in this module should run first +pytestmark = pytest.mark.run_first + def get_keys_from_typeddict(typed_dict_class: dict) -> Set[str]: """Extract required keys from a TypedDict class, excluding NotRequired fields.""" diff --git a/tests/unit/test_recipes_and_test_suites.py b/tests/unit/test_recipes_and_test_suites.py index 032909295e..4ac4414b9d 100644 --- a/tests/unit/test_recipes_and_test_suites.py +++ b/tests/unit/test_recipes_and_test_suites.py @@ -17,6 +17,9 @@ import pytest +# All tests in this module should run first +pytestmark = pytest.mark.run_first + dir_path = os.path.dirname(os.path.abspath(__file__)) project_root = os.path.abspath(os.path.join(dir_path, "..", "..")) configs_dir = os.path.join(project_root, "examples", "configs")