diff --git a/nemo_skills/dataset/utils.py b/nemo_skills/dataset/utils.py index 0b59a2b7db..b5283a4799 100644 --- a/nemo_skills/dataset/utils.py +++ b/nemo_skills/dataset/utils.py @@ -26,7 +26,8 @@ from urllib.error import URLError from nemo_skills.evaluation.math_grader import extract_answer -from nemo_skills.pipeline.utils import cluster_download_file, get_unmounted_path +from nemo_skills.pipeline.utils import cluster_download_file, get_unmounted_path, is_mounted_filepath +from nemo_skills.pipeline.utils.packager import get_registered_external_repo @contextlib.contextmanager @@ -88,9 +89,26 @@ def _get_dataset_module_from_cluster(cluster_config, mounted_path): def get_default_dataset_module(dataset, data_dir=None, cluster_config=None): + """ + Resolve and import the specified dataset module and determine its data path and whether it was loaded from a cluster location. + + Parameters: + dataset (str): Dataset import name or package path (e.g., "my_dataset" or "package.subpkg"). + data_dir (str | None): Optional filesystem path to the dataset. When provided, the function will prefer loading the module from this path; when None, it resolves the default nemo_skills dataset location. + cluster_config (dict | None): Optional cluster configuration that influences path resolution and loading strategy (e.g., executor type, mounted path handling). + + Returns: + tuple: A 3-tuple (dataset_module, data_path, is_on_cluster) where + - dataset_module: the imported Python module for the dataset, + - data_path (str): the filesystem path used for the dataset (either resolved default or the provided data_dir), + - is_on_cluster (bool): `True` if the module was loaded from a cluster-mounted location, `False` otherwise. + """ is_on_cluster = False if data_dir is None: - data_path = "/nemo_run/code/nemo_skills/dataset" + if is_mounted_filepath(cluster_config, data_dir) or cluster_config["executor"] == "none": + data_path = str(get_registered_external_repo("nemo_skills").path / "dataset") + else: + data_path = "/nemo_run/code/nemo_skills/dataset" dataset_module = importlib.import_module(f"nemo_skills.dataset.{dataset}") else: data_path = data_dir @@ -235,4 +253,4 @@ def get_mcq_fields(question, choices): "problem": f"{question}\n\n{options_text}", "options": options_text, **options_dict, - } + } \ No newline at end of file diff --git a/nemo_skills/pipeline/utils/eval.py b/nemo_skills/pipeline/utils/eval.py index dd44ea0f83..67bdfe3737 100644 --- a/nemo_skills/pipeline/utils/eval.py +++ b/nemo_skills/pipeline/utils/eval.py @@ -87,11 +87,31 @@ def get_benchmark_args_from_module( benchmark_group=None, override_dict=None, ): + """ + Constructs a BenchmarkArgs instance for a single benchmark module by resolving input paths, generation/judge settings, sandbox requirements, and related metadata. + + Parameters: + benchmark_module: The imported benchmark module or object containing dataset and configuration attributes. + benchmark: Dot-separated benchmark identifier (e.g., "my.benchmark") used to build dataset paths and names. + split: Dataset split to use (e.g., "test"); if None, the module's EVAL_SPLIT or "test" is used. + cluster_config: Cluster/executor configuration mapping used to determine mounted paths and executor behavior. + data_path: Base path (mounted or local) where benchmark datasets are stored. + is_on_cluster: True when running on-cluster (use mounted paths); False when running locally (may use unmounted/local paths). + eval_requires_judge: If True, forces evaluation to reserve judged-output handling (affects eval_subfolder). + benchmark_group: Optional group name to include in the evaluation subfolder. + override_dict: Optional dictionary of values that override attributes on the benchmark_module. + + Returns: + BenchmarkArgs: Populated dataclass containing resolved input_file, generation arguments, judge settings, sandbox flags, sampling/chunk counts, eval_subfolder, and any sandbox environment overrides. + + Raises: + ValueError: If the resolved dataset file does not exist on the cluster or locally. + """ if split is None: split = get_arg_from_module_or_dict(benchmark_module, "EVAL_SPLIT", "test", override_dict) if not is_on_cluster: - if pipeline_utils.is_mounted_filepath(cluster_config, data_path): + if pipeline_utils.is_mounted_filepath(cluster_config, data_path) or cluster_config["executor"] == "none": input_file = f"{data_path}/{benchmark.replace('.', '/')}/{split}.jsonl" unmounted_input_file = pipeline_utils.get_unmounted_path(cluster_config, input_file) unmounted_path = str(Path(__file__).parents[3] / unmounted_input_file.replace("/nemo_run/code/", "")) @@ -493,4 +513,4 @@ def prepare_eval_commands( cur_eval += 1 - return benchmarks_dict, job_batches + return benchmarks_dict, job_batches \ No newline at end of file