Skip to content

Commit a14b055

Browse files
Pass datasets trust_remote_code (#31406)
* Pass datasets trust_remote_code * Pass trust_remote_code in more tests * Add trust_remote_dataset_code arg to some tests * Revert "Temporarily pin datasets upper version to fix CI" This reverts commit b767282. * Pass trust_remote_code in librispeech_asr_dummy docstrings * Revert "Pin datasets<2.20.0 for examples" This reverts commit 833fc17. * Pass trust_remote_code to all examples * Revert "Add trust_remote_dataset_code arg to some tests" to research_projects * Pass trust_remote_code to tests * Pass trust_remote_code to docstrings * Fix flax examples tests requirements * Pass trust_remote_dataset_code arg to tests * Replace trust_remote_dataset_code with trust_remote_code in one example * Fix duplicate trust_remote_code * Replace args.trust_remote_dataset_code with args.trust_remote_code * Replace trust_remote_dataset_code with trust_remote_code in parser * Replace trust_remote_dataset_code with trust_remote_code in dataclasses * Replace trust_remote_dataset_code with trust_remote_code arg
1 parent 485fd81 commit a14b055

File tree

168 files changed

+804
-410
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

168 files changed

+804
-410
lines changed

examples/flax/_tests_requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
datasets >= 1.13.3,<2.20.0 # Temporary upper version
1+
datasets >= 1.13.3
22
pytest<8.0.1
33
conllu
44
nltk

examples/flax/image-captioning/run_image_captioning_flax.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,9 +195,9 @@ class ModelArguments:
195195
default=False,
196196
metadata={
197197
"help": (
198-
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
199-
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
200-
"execute code present on the Hub on your local machine."
198+
"Whether to trust the execution of code from datasets/models defined on the Hub."
199+
" This option should only be set to `True` for repositories you trust and in which you have read the"
200+
" code, as it will execute code present on the Hub on your local machine."
201201
)
202202
},
203203
)
@@ -458,6 +458,7 @@ def main():
458458
keep_in_memory=False,
459459
data_dir=data_args.data_dir,
460460
token=model_args.token,
461+
trust_remote_code=model_args.trust_remote_code,
461462
)
462463
else:
463464
data_files = {}

examples/flax/language-modeling/run_bart_dlm_flax.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,16 @@ class DataTrainingArguments:
191191
dataset_config_name: Optional[str] = field(
192192
default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
193193
)
194+
trust_remote_code: bool = field(
195+
default=False,
196+
metadata={
197+
"help": (
198+
"Whether to trust the execution of code from datasets/models defined on the Hub."
199+
" This option should only be set to `True` for repositories you trust and in which you have read the"
200+
" code, as it will execute code present on the Hub on your local machine."
201+
)
202+
},
203+
)
194204
train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
195205
validation_file: Optional[str] = field(
196206
default=None,
@@ -518,6 +528,7 @@ def main():
518528
cache_dir=model_args.cache_dir,
519529
token=model_args.token,
520530
num_proc=data_args.preprocessing_num_workers,
531+
trust_remote_code=data_args.trust_remote_code,
521532
)
522533

523534
if "validation" not in datasets.keys():
@@ -528,6 +539,7 @@ def main():
528539
cache_dir=model_args.cache_dir,
529540
token=model_args.token,
530541
num_proc=data_args.preprocessing_num_workers,
542+
trust_remote_code=data_args.trust_remote_code,
531543
)
532544
datasets["train"] = load_dataset(
533545
data_args.dataset_name,
@@ -536,6 +548,7 @@ def main():
536548
cache_dir=model_args.cache_dir,
537549
token=model_args.token,
538550
num_proc=data_args.preprocessing_num_workers,
551+
trust_remote_code=data_args.trust_remote_code,
539552
)
540553
else:
541554
data_files = {}

examples/flax/language-modeling/run_clm_flax.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,9 @@ class ModelArguments:
182182
default=False,
183183
metadata={
184184
"help": (
185-
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
186-
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
187-
"execute code present on the Hub on your local machine."
185+
"Whether to trust the execution of code from datasets/models defined on the Hub."
186+
" This option should only be set to `True` for repositories you trust and in which you have read the"
187+
" code, as it will execute code present on the Hub on your local machine."
188188
)
189189
},
190190
)
@@ -408,6 +408,7 @@ def main():
408408
keep_in_memory=False,
409409
token=model_args.token,
410410
num_proc=data_args.preprocessing_num_workers,
411+
trust_remote_code=model_args.trust_remote_code,
411412
)
412413

413414
if "validation" not in dataset.keys():
@@ -418,6 +419,7 @@ def main():
418419
cache_dir=model_args.cache_dir,
419420
token=model_args.token,
420421
num_proc=data_args.preprocessing_num_workers,
422+
trust_remote_code=model_args.trust_remote_code,
421423
)
422424
dataset["train"] = load_dataset(
423425
data_args.dataset_name,
@@ -426,6 +428,7 @@ def main():
426428
cache_dir=model_args.cache_dir,
427429
token=model_args.token,
428430
num_proc=data_args.preprocessing_num_workers,
431+
trust_remote_code=model_args.trust_remote_code,
429432
)
430433
else:
431434
data_files = {}

examples/flax/language-modeling/run_mlm_flax.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,9 @@ class ModelArguments:
188188
default=False,
189189
metadata={
190190
"help": (
191-
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
192-
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
193-
"execute code present on the Hub on your local machine."
191+
"Whether to trust the execution of code from datasets/models defined on the Hub."
192+
" This option should only be set to `True` for repositories you trust and in which you have read the"
193+
" code, as it will execute code present on the Hub on your local machine."
194194
)
195195
},
196196
)
@@ -446,6 +446,7 @@ def main():
446446
cache_dir=model_args.cache_dir,
447447
token=model_args.token,
448448
num_proc=data_args.preprocessing_num_workers,
449+
trust_remote_code=model_args.trust_remote_code,
449450
)
450451

451452
if "validation" not in datasets.keys():
@@ -456,6 +457,7 @@ def main():
456457
cache_dir=model_args.cache_dir,
457458
token=model_args.token,
458459
num_proc=data_args.preprocessing_num_workers,
460+
trust_remote_code=model_args.trust_remote_code,
459461
)
460462
datasets["train"] = load_dataset(
461463
data_args.dataset_name,
@@ -464,6 +466,7 @@ def main():
464466
cache_dir=model_args.cache_dir,
465467
token=model_args.token,
466468
num_proc=data_args.preprocessing_num_workers,
469+
trust_remote_code=model_args.trust_remote_code,
467470
)
468471
else:
469472
data_files = {}

examples/flax/language-modeling/run_t5_mlm_flax.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,16 @@ class DataTrainingArguments:
192192
dataset_config_name: Optional[str] = field(
193193
default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
194194
)
195+
trust_remote_code: bool = field(
196+
default=False,
197+
metadata={
198+
"help": (
199+
"Whether to trust the execution of code from datasets/models defined on the Hub."
200+
" This option should only be set to `True` for repositories you trust and in which you have read the"
201+
" code, as it will execute code present on the Hub on your local machine."
202+
)
203+
},
204+
)
195205
train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
196206
validation_file: Optional[str] = field(
197207
default=None,
@@ -560,6 +570,7 @@ def main():
560570
cache_dir=model_args.cache_dir,
561571
token=model_args.token,
562572
num_proc=data_args.preprocessing_num_workers,
573+
trust_remote_code=data_args.trust_remote_code,
563574
)
564575

565576
if "validation" not in datasets.keys():
@@ -570,6 +581,7 @@ def main():
570581
cache_dir=model_args.cache_dir,
571582
token=model_args.token,
572583
num_proc=data_args.preprocessing_num_workers,
584+
trust_remote_code=data_args.trust_remote_code,
573585
)
574586
datasets["train"] = load_dataset(
575587
data_args.dataset_name,
@@ -578,6 +590,7 @@ def main():
578590
cache_dir=model_args.cache_dir,
579591
token=model_args.token,
580592
num_proc=data_args.preprocessing_num_workers,
593+
trust_remote_code=data_args.trust_remote_code,
581594
)
582595
else:
583596
data_files = {}

examples/flax/question-answering/run_qa.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,9 @@ class ModelArguments:
168168
default=False,
169169
metadata={
170170
"help": (
171-
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
172-
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
173-
"execute code present on the Hub on your local machine."
171+
"Whether to trust the execution of code from datasets/models defined on the Hub."
172+
" This option should only be set to `True` for repositories you trust and in which you have read the"
173+
" code, as it will execute code present on the Hub on your local machine."
174174
)
175175
},
176176
)
@@ -498,6 +498,7 @@ def main():
498498
data_args.dataset_config_name,
499499
cache_dir=model_args.cache_dir,
500500
token=model_args.token,
501+
trust_remote_code=model_args.trust_remote_code,
501502
)
502503
else:
503504
# Loading the dataset from local csv or json file.

examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,16 @@ class DataTrainingArguments:
136136
dataset_config_name: Optional[str] = field(
137137
default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
138138
)
139+
trust_remote_code: bool = field(
140+
default=False,
141+
metadata={
142+
"help": (
143+
"Whether to trust the execution of code from datasets/models defined on the Hub."
144+
" This option should only be set to `True` for repositories you trust and in which you have read the"
145+
" code, as it will execute code present on the Hub on your local machine."
146+
)
147+
},
148+
)
139149
text_column: Optional[str] = field(
140150
default=None,
141151
metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
@@ -442,6 +452,7 @@ def main():
442452
cache_dir=data_args.dataset_cache_dir,
443453
num_proc=data_args.preprocessing_num_workers,
444454
token=True if model_args.use_auth_token else None,
455+
trust_remote_code=data_args.trust_remote_code,
445456
)
446457

447458
if training_args.do_eval:
@@ -452,6 +463,7 @@ def main():
452463
cache_dir=data_args.dataset_cache_dir,
453464
num_proc=data_args.preprocessing_num_workers,
454465
token=True if model_args.use_auth_token else None,
466+
trust_remote_code=data_args.trust_remote_code,
455467
)
456468

457469
if not training_args.do_train and not training_args.do_eval:

examples/flax/summarization/run_summarization_flax.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,9 @@ class ModelArguments:
201201
default=False,
202202
metadata={
203203
"help": (
204-
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
205-
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
206-
"execute code present on the Hub on your local machine."
204+
"Whether to trust the execution of code from datasets/models defined on the Hub."
205+
" This option should only be set to `True` for repositories you trust and in which you have read the"
206+
" code, as it will execute code present on the Hub on your local machine."
207207
)
208208
},
209209
)
@@ -485,6 +485,7 @@ def main():
485485
cache_dir=model_args.cache_dir,
486486
keep_in_memory=False,
487487
token=model_args.token,
488+
trust_remote_code=model_args.trust_remote_code,
488489
)
489490
else:
490491
data_files = {}

examples/flax/test_flax_examples.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ def test_run_flax_speech_recognition_seq2seq(self):
265265
--dataset_config clean
266266
--train_split_name validation
267267
--eval_split_name validation
268+
--trust_remote_code
268269
--output_dir {tmp_dir}
269270
--overwrite_output_dir
270271
--num_train_epochs=2

0 commit comments

Comments
 (0)