Skip to content

Commit fb725e2

Browse files
authored
Upgrade huggingface_hub to fix datasets import and add trust_remote_code in datasets (#84)
1 parent 3da9220 commit fb725e2

File tree

6 files changed

+1184
-1160
lines changed

6 files changed

+1184
-1160
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ Summary: create a **line summary** of your evaluation, in `src/lighteval/tasks/t
237237
- `metric` (list), the metrics you want to use for your evaluation (see next section for a detailed explanation)
238238
- `output_regex` (str), A regex string that will be used to filter your generation. (Genrative metrics will only select tokens that are between the first and the second sequence matched by the regex. For example, for a regex matching `\n` and a generation `\nModel generation output\nSome other text` the metric will only be fed with `Model generation output`)
239239
- `frozen` (bool), for now is set to False, but we will steadily pass all stable tasks to True.
240+
- `trust_dataset` (bool), set to True if you trust the dataset.
240241

241242
Make sure you can launch your model with your new task using `--tasks lighteval|yournewtask|2|0`.
242243

community_tasks/arabic_evals.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def __init__(
4949
stop_sequence=None,
5050
output_regex=None,
5151
frozen=False,
52+
trust_dataset=True,
5253
)
5354

5455

@@ -115,6 +116,7 @@ def __init__(
115116
stop_sequence=None,
116117
output_regex=None,
117118
frozen=False,
119+
trust_dataset=True,
118120
)
119121

120122

@@ -145,6 +147,7 @@ def acva(line, task_name: str = None):
145147
few_shots_split="validation",
146148
few_shots_select="sequential",
147149
metric=["loglikelihood_acc"],
150+
trust_dataset=True,
148151
)
149152

150153

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ keywords = ["evaluation", "nlp", "llm"]
5050
dependencies = [
5151
# Base dependencies
5252
"transformers>=4.38.0",
53-
"huggingface_hub==0.20.3",
53+
"huggingface_hub>=0.21.2",
5454
"torch>=2.0",
5555
"GitPython==3.1.31", # for logging
5656
"datasets>=2.14.0",

src/lighteval/tasks/lighteval_task.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from pathlib import Path
66
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
77

8-
from datasets import load_dataset
8+
from datasets import DownloadMode, load_dataset
99

1010
from lighteval.few_shot_manager import FewShotSampler
1111
from lighteval.logging.hierarchical_logger import hlog, hlog_warn
@@ -62,7 +62,7 @@ class LightevalTaskConfig:
6262
truncated_num_docs (bool): Whether less than the total number of documents were used
6363
output_regex (str)
6464
frozen (bool)
65-
65+
trust_dataset (bool): Whether to trust the dataset at execution or not
6666
"""
6767

6868
name: str
@@ -84,6 +84,8 @@ class LightevalTaskConfig:
8484
original_num_docs: int = -1
8585
effective_num_docs: int = -1
8686

87+
trust_dataset: bool = None
88+
8789
def as_dict(self):
8890
return {
8991
"name": self.name,
@@ -144,6 +146,7 @@ def __init__(self, name: str, cfg: LightevalTaskConfig, cache_dir: Optional[str]
144146
self.dataset_path = self.hf_repo
145147
self.dataset_config_name = self.hf_subset
146148
self.dataset = None # Delayed download
149+
self.trust_dataset = cfg.trust_dataset
147150
hlog(f"{self.dataset_path} {self.dataset_config_name}")
148151
self._fewshot_docs = None
149152
self._docs = None
@@ -521,14 +524,10 @@ def load_datasets(tasks: list["LightevalTask"], dataset_loading_processes: int =
521524
"""
522525

523526
if dataset_loading_processes <= 1:
524-
datasets = [
525-
download_dataset_worker((task.dataset_path, task.dataset_config_name)) for task in tasks
526-
] # Also help us with gdb
527+
datasets = [download_dataset_worker(task) for task in tasks] # Also help us with gdb
527528
else:
528529
with Pool(processes=dataset_loading_processes) as pool:
529-
datasets = pool.map(
530-
download_dataset_worker, [(task.dataset_path, task.dataset_config_name) for task in tasks]
531-
)
530+
datasets = pool.map(download_dataset_worker, tasks)
532531

533532
for task, dataset in zip(tasks, datasets):
534533
task.dataset = dataset
@@ -539,13 +538,14 @@ def download_dataset_worker(args):
539538
Worker function to download a dataset from the HuggingFace Hub.
540539
Used for parallel dataset loading.
541540
"""
542-
dataset_path, dataset_config_name = args
541+
task: LightevalTask = args
543542
dataset = load_dataset(
544-
path=dataset_path,
545-
name=dataset_config_name,
543+
path=task.dataset_path,
544+
name=task.dataset_config_name,
546545
data_dir=None,
547546
cache_dir=None,
548-
download_mode=None,
547+
download_mode=DownloadMode.FORCE_REDOWNLOAD, # None
548+
trust_remote_code=task.trust_dataset,
549549
)
550550
return dataset
551551

0 commit comments

Comments
 (0)