NVIDIA-NeMo · coderabbitai · Dec 10, 2025
diff --git a/nemo_skills/dataset/utils.py b/nemo_skills/dataset/utils.py
@@ -26,7 +26,8 @@
 from urllib.error import URLError
 
 from nemo_skills.evaluation.math_grader import extract_answer
-from nemo_skills.pipeline.utils import cluster_download_file, get_unmounted_path
+from nemo_skills.pipeline.utils import cluster_download_file, get_unmounted_path, is_mounted_filepath
+from nemo_skills.pipeline.utils.packager import get_registered_external_repo
 
 
 @contextlib.contextmanager
@@ -88,9 +89,26 @@ def _get_dataset_module_from_cluster(cluster_config, mounted_path):
 
 
 def get_default_dataset_module(dataset, data_dir=None, cluster_config=None):
+    """
+    Resolve and import the specified dataset module and determine its data path and whether it was loaded from a cluster location.
+
+    Parameters:
+        dataset (str): Dataset import name or package path (e.g., "my_dataset" or "package.subpkg").
+        data_dir (str | None): Optional filesystem path to the dataset. When provided, the function will prefer loading the module from this path; when None, it resolves the default nemo_skills dataset location.
+        cluster_config (dict | None): Optional cluster configuration that influences path resolution and loading strategy (e.g., executor type, mounted path handling).
+
+    Returns:
+        tuple: A 3-tuple (dataset_module, data_path, is_on_cluster) where
+            - dataset_module: the imported Python module for the dataset,
+            - data_path (str): the filesystem path used for the dataset (either resolved default or the provided data_dir),
+            - is_on_cluster (bool): `True` if the module was loaded from a cluster-mounted location, `False` otherwise.
+    """
     is_on_cluster = False
     if data_dir is None:
-        data_path = "/nemo_run/code/nemo_skills/dataset"
+        if is_mounted_filepath(cluster_config, data_dir) or cluster_config["executor"] == "none":
+            data_path = str(get_registered_external_repo("nemo_skills").path / "dataset")
+        else:
+            data_path = "/nemo_run/code/nemo_skills/dataset"
         dataset_module = importlib.import_module(f"nemo_skills.dataset.{dataset}")
     else:
         data_path = data_dir
@@ -235,4 +253,4 @@ def get_mcq_fields(question, choices):
         "problem": f"{question}\n\n{options_text}",
         "options": options_text,
         **options_dict,
-    }
+    }
diff --git a/nemo_skills/pipeline/utils/eval.py b/nemo_skills/pipeline/utils/eval.py
@@ -87,11 +87,31 @@ def get_benchmark_args_from_module(
     benchmark_group=None,
     override_dict=None,
 ):
+    """
+    Constructs a BenchmarkArgs instance for a single benchmark module by resolving input paths, generation/judge settings, sandbox requirements, and related metadata.
+
+    Parameters:
+        benchmark_module: The imported benchmark module or object containing dataset and configuration attributes.
+        benchmark: Dot-separated benchmark identifier (e.g., "my.benchmark") used to build dataset paths and names.
+        split: Dataset split to use (e.g., "test"); if None, the module's EVAL_SPLIT or "test" is used.
+        cluster_config: Cluster/executor configuration mapping used to determine mounted paths and executor behavior.
+        data_path: Base path (mounted or local) where benchmark datasets are stored.
+        is_on_cluster: True when running on-cluster (use mounted paths); False when running locally (may use unmounted/local paths).
+        eval_requires_judge: If True, forces evaluation to reserve judged-output handling (affects eval_subfolder).
+        benchmark_group: Optional group name to include in the evaluation subfolder.
+        override_dict: Optional dictionary of values that override attributes on the benchmark_module.
+
+    Returns:
+        BenchmarkArgs: Populated dataclass containing resolved input_file, generation arguments, judge settings, sandbox flags, sampling/chunk counts, eval_subfolder, and any sandbox environment overrides.
+
+    Raises:
+        ValueError: If the resolved dataset file does not exist on the cluster or locally.
+    """
     if split is None:
         split = get_arg_from_module_or_dict(benchmark_module, "EVAL_SPLIT", "test", override_dict)
 
     if not is_on_cluster:
-        if pipeline_utils.is_mounted_filepath(cluster_config, data_path):
+        if pipeline_utils.is_mounted_filepath(cluster_config, data_path) or cluster_config["executor"] == "none":
             input_file = f"{data_path}/{benchmark.replace('.', '/')}/{split}.jsonl"
             unmounted_input_file = pipeline_utils.get_unmounted_path(cluster_config, input_file)
             unmounted_path = str(Path(__file__).parents[3] / unmounted_input_file.replace("/nemo_run/code/", ""))
@@ -493,4 +513,4 @@ def prepare_eval_commands(
 
                 cur_eval += 1
 
-    return benchmarks_dict, job_batches
+    return benchmarks_dict, job_batches