Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions nemo_skills/dataset/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
from urllib.error import URLError

from nemo_skills.evaluation.math_grader import extract_answer
from nemo_skills.pipeline.utils import cluster_download_file, get_unmounted_path
from nemo_skills.pipeline.utils import cluster_download_file, get_unmounted_path, is_mounted_filepath
from nemo_skills.pipeline.utils.packager import get_registered_external_repo


@contextlib.contextmanager
Expand Down Expand Up @@ -88,9 +89,26 @@ def _get_dataset_module_from_cluster(cluster_config, mounted_path):


def get_default_dataset_module(dataset, data_dir=None, cluster_config=None):
"""
Resolve and import the specified dataset module and determine its data path and whether it was loaded from a cluster location.

Parameters:
dataset (str): Dataset import name or package path (e.g., "my_dataset" or "package.subpkg").
data_dir (str | None): Optional filesystem path to the dataset. When provided, the function will prefer loading the module from this path; when None, it resolves the default nemo_skills dataset location.
cluster_config (dict | None): Optional cluster configuration that influences path resolution and loading strategy (e.g., executor type, mounted path handling).

Returns:
tuple: A 3-tuple (dataset_module, data_path, is_on_cluster) where
- dataset_module: the imported Python module for the dataset,
- data_path (str): the filesystem path used for the dataset (either resolved default or the provided data_dir),
- is_on_cluster (bool): `True` if the module was loaded from a cluster-mounted location, `False` otherwise.
"""
is_on_cluster = False
if data_dir is None:
data_path = "/nemo_run/code/nemo_skills/dataset"
if is_mounted_filepath(cluster_config, data_dir) or cluster_config["executor"] == "none":
data_path = str(get_registered_external_repo("nemo_skills").path / "dataset")
else:
data_path = "/nemo_run/code/nemo_skills/dataset"
dataset_module = importlib.import_module(f"nemo_skills.dataset.{dataset}")
else:
data_path = data_dir
Expand Down Expand Up @@ -235,4 +253,4 @@ def get_mcq_fields(question, choices):
"problem": f"{question}\n\n{options_text}",
"options": options_text,
**options_dict,
}
}
24 changes: 22 additions & 2 deletions nemo_skills/pipeline/utils/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,31 @@ def get_benchmark_args_from_module(
benchmark_group=None,
override_dict=None,
):
"""
Constructs a BenchmarkArgs instance for a single benchmark module by resolving input paths, generation/judge settings, sandbox requirements, and related metadata.

Parameters:
benchmark_module: The imported benchmark module or object containing dataset and configuration attributes.
benchmark: Dot-separated benchmark identifier (e.g., "my.benchmark") used to build dataset paths and names.
split: Dataset split to use (e.g., "test"); if None, the module's EVAL_SPLIT or "test" is used.
cluster_config: Cluster/executor configuration mapping used to determine mounted paths and executor behavior.
data_path: Base path (mounted or local) where benchmark datasets are stored.
is_on_cluster: True when running on-cluster (use mounted paths); False when running locally (may use unmounted/local paths).
eval_requires_judge: If True, forces evaluation to reserve judged-output handling (affects eval_subfolder).
benchmark_group: Optional group name to include in the evaluation subfolder.
override_dict: Optional dictionary of values that override attributes on the benchmark_module.

Returns:
BenchmarkArgs: Populated dataclass containing resolved input_file, generation arguments, judge settings, sandbox flags, sampling/chunk counts, eval_subfolder, and any sandbox environment overrides.

Raises:
ValueError: If the resolved dataset file does not exist on the cluster or locally.
"""
if split is None:
split = get_arg_from_module_or_dict(benchmark_module, "EVAL_SPLIT", "test", override_dict)

if not is_on_cluster:
if pipeline_utils.is_mounted_filepath(cluster_config, data_path):
if pipeline_utils.is_mounted_filepath(cluster_config, data_path) or cluster_config["executor"] == "none":
input_file = f"{data_path}/{benchmark.replace('.', '/')}/{split}.jsonl"
unmounted_input_file = pipeline_utils.get_unmounted_path(cluster_config, input_file)
unmounted_path = str(Path(__file__).parents[3] / unmounted_input_file.replace("/nemo_run/code/", ""))
Expand Down Expand Up @@ -493,4 +513,4 @@ def prepare_eval_commands(

cur_eval += 1

return benchmarks_dict, job_batches
return benchmarks_dict, job_batches
Loading