huggingface
diff --git a/‎examples/flax/_tests_requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎examples/flax/_tests_requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/flax/image-captioning/run_image_captioning_flax.py‎
Lines changed: 4 additions & 3 deletions b/‎examples/flax/image-captioning/run_image_captioning_flax.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎examples/flax/language-modeling/run_bart_dlm_flax.py‎
Lines changed: 13 additions & 0 deletions b/‎examples/flax/language-modeling/run_bart_dlm_flax.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎examples/flax/language-modeling/run_clm_flax.py‎
Lines changed: 6 additions & 3 deletions b/‎examples/flax/language-modeling/run_clm_flax.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎examples/flax/language-modeling/run_mlm_flax.py‎
Lines changed: 6 additions & 3 deletions b/‎examples/flax/language-modeling/run_mlm_flax.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎examples/flax/language-modeling/run_t5_mlm_flax.py‎
Lines changed: 13 additions & 0 deletions b/‎examples/flax/language-modeling/run_t5_mlm_flax.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎examples/flax/question-answering/run_qa.py‎
Lines changed: 4 additions & 3 deletions b/‎examples/flax/question-answering/run_qa.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py‎
Lines changed: 12 additions & 0 deletions b/‎examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎examples/flax/summarization/run_summarization_flax.py‎
Lines changed: 4 additions & 3 deletions b/‎examples/flax/summarization/run_summarization_flax.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎examples/flax/test_flax_examples.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/flax/test_flax_examples.py‎
Lines changed: 1 addition & 0 deletions
@@ -1,4 +1,4 @@
-datasets >= 1.13.3,<2.20.0 # Temporary upper version
+datasets >= 1.13.3
 pytest<8.0.1
 conllu
 nltk
 
@@ -195,9 +195,9 @@ class ModelArguments:
         default=False,
         metadata={
             "help": (
-                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
-                "execute code present on the Hub on your local machine."
+                "Whether to trust the execution of code from datasets/models defined on the Hub."
+                " This option should only be set to `True` for repositories you trust and in which you have read the"
+                " code, as it will execute code present on the Hub on your local machine."
             )
         },
     )
@@ -458,6 +458,7 @@ def main():
             keep_in_memory=False,
             data_dir=data_args.data_dir,
             token=model_args.token,
+            trust_remote_code=model_args.trust_remote_code,
         )
     else:
         data_files = {}
 
@@ -191,6 +191,16 @@ class DataTrainingArguments:
     dataset_config_name: Optional[str] = field(
         default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
     )
+    trust_remote_code: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "Whether to trust the execution of code from datasets/models defined on the Hub."
+                " This option should only be set to `True` for repositories you trust and in which you have read the"
+                " code, as it will execute code present on the Hub on your local machine."
+            )
+        },
+    )
     train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
     validation_file: Optional[str] = field(
         default=None,
@@ -518,6 +528,7 @@ def main():
             cache_dir=model_args.cache_dir,
             token=model_args.token,
             num_proc=data_args.preprocessing_num_workers,
+            trust_remote_code=data_args.trust_remote_code,
         )
 
         if "validation" not in datasets.keys():
@@ -528,6 +539,7 @@ def main():
                 cache_dir=model_args.cache_dir,
                 token=model_args.token,
                 num_proc=data_args.preprocessing_num_workers,
+                trust_remote_code=data_args.trust_remote_code,
             )
             datasets["train"] = load_dataset(
                 data_args.dataset_name,
@@ -536,6 +548,7 @@ def main():
                 cache_dir=model_args.cache_dir,
                 token=model_args.token,
                 num_proc=data_args.preprocessing_num_workers,
+                trust_remote_code=data_args.trust_remote_code,
             )
     else:
         data_files = {}
 
@@ -182,9 +182,9 @@ class ModelArguments:
         default=False,
         metadata={
             "help": (
-                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
-                "execute code present on the Hub on your local machine."
+                "Whether to trust the execution of code from datasets/models defined on the Hub."
+                " This option should only be set to `True` for repositories you trust and in which you have read the"
+                " code, as it will execute code present on the Hub on your local machine."
             )
         },
     )
@@ -408,6 +408,7 @@ def main():
             keep_in_memory=False,
             token=model_args.token,
             num_proc=data_args.preprocessing_num_workers,
+            trust_remote_code=model_args.trust_remote_code,
         )
 
         if "validation" not in dataset.keys():
@@ -418,6 +419,7 @@ def main():
                 cache_dir=model_args.cache_dir,
                 token=model_args.token,
                 num_proc=data_args.preprocessing_num_workers,
+                trust_remote_code=model_args.trust_remote_code,
             )
             dataset["train"] = load_dataset(
                 data_args.dataset_name,
@@ -426,6 +428,7 @@ def main():
                 cache_dir=model_args.cache_dir,
                 token=model_args.token,
                 num_proc=data_args.preprocessing_num_workers,
+                trust_remote_code=model_args.trust_remote_code,
             )
     else:
         data_files = {}
 
@@ -188,9 +188,9 @@ class ModelArguments:
         default=False,
         metadata={
             "help": (
-                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
-                "execute code present on the Hub on your local machine."
+                "Whether to trust the execution of code from datasets/models defined on the Hub."
+                " This option should only be set to `True` for repositories you trust and in which you have read the"
+                " code, as it will execute code present on the Hub on your local machine."
             )
         },
     )
@@ -446,6 +446,7 @@ def main():
             cache_dir=model_args.cache_dir,
             token=model_args.token,
             num_proc=data_args.preprocessing_num_workers,
+            trust_remote_code=model_args.trust_remote_code,
         )
 
         if "validation" not in datasets.keys():
@@ -456,6 +457,7 @@ def main():
                 cache_dir=model_args.cache_dir,
                 token=model_args.token,
                 num_proc=data_args.preprocessing_num_workers,
+                trust_remote_code=model_args.trust_remote_code,
             )
             datasets["train"] = load_dataset(
                 data_args.dataset_name,
@@ -464,6 +466,7 @@ def main():
                 cache_dir=model_args.cache_dir,
                 token=model_args.token,
                 num_proc=data_args.preprocessing_num_workers,
+                trust_remote_code=model_args.trust_remote_code,
             )
     else:
         data_files = {}
 
@@ -192,6 +192,16 @@ class DataTrainingArguments:
     dataset_config_name: Optional[str] = field(
         default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
     )
+    trust_remote_code: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "Whether to trust the execution of code from datasets/models defined on the Hub."
+                " This option should only be set to `True` for repositories you trust and in which you have read the"
+                " code, as it will execute code present on the Hub on your local machine."
+            )
+        },
+    )
     train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
     validation_file: Optional[str] = field(
         default=None,
@@ -560,6 +570,7 @@ def main():
             cache_dir=model_args.cache_dir,
             token=model_args.token,
             num_proc=data_args.preprocessing_num_workers,
+            trust_remote_code=data_args.trust_remote_code,
         )
 
         if "validation" not in datasets.keys():
@@ -570,6 +581,7 @@ def main():
                 cache_dir=model_args.cache_dir,
                 token=model_args.token,
                 num_proc=data_args.preprocessing_num_workers,
+                trust_remote_code=data_args.trust_remote_code,
             )
             datasets["train"] = load_dataset(
                 data_args.dataset_name,
@@ -578,6 +590,7 @@ def main():
                 cache_dir=model_args.cache_dir,
                 token=model_args.token,
                 num_proc=data_args.preprocessing_num_workers,
+                trust_remote_code=data_args.trust_remote_code,
             )
     else:
         data_files = {}
 
@@ -168,9 +168,9 @@ class ModelArguments:
         default=False,
         metadata={
             "help": (
-                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
-                "execute code present on the Hub on your local machine."
+                "Whether to trust the execution of code from datasets/models defined on the Hub."
+                " This option should only be set to `True` for repositories you trust and in which you have read the"
+                " code, as it will execute code present on the Hub on your local machine."
             )
         },
     )
@@ -498,6 +498,7 @@ def main():
             data_args.dataset_config_name,
             cache_dir=model_args.cache_dir,
             token=model_args.token,
+            trust_remote_code=model_args.trust_remote_code,
         )
     else:
         # Loading the dataset from local csv or json file.
 
@@ -136,6 +136,16 @@ class DataTrainingArguments:
     dataset_config_name: Optional[str] = field(
         default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
     )
+    trust_remote_code: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "Whether to trust the execution of code from datasets/models defined on the Hub."
+                " This option should only be set to `True` for repositories you trust and in which you have read the"
+                " code, as it will execute code present on the Hub on your local machine."
+            )
+        },
+    )
     text_column: Optional[str] = field(
         default=None,
         metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
@@ -442,6 +452,7 @@ def main():
             cache_dir=data_args.dataset_cache_dir,
             num_proc=data_args.preprocessing_num_workers,
             token=True if model_args.use_auth_token else None,
+            trust_remote_code=data_args.trust_remote_code,
         )
 
     if training_args.do_eval:
@@ -452,6 +463,7 @@ def main():
             cache_dir=data_args.dataset_cache_dir,
             num_proc=data_args.preprocessing_num_workers,
             token=True if model_args.use_auth_token else None,
+            trust_remote_code=data_args.trust_remote_code,
         )
 
     if not training_args.do_train and not training_args.do_eval:
 
@@ -201,9 +201,9 @@ class ModelArguments:
         default=False,
         metadata={
             "help": (
-                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
-                "execute code present on the Hub on your local machine."
+                "Whether to trust the execution of code from datasets/models defined on the Hub."
+                " This option should only be set to `True` for repositories you trust and in which you have read the"
+                " code, as it will execute code present on the Hub on your local machine."
             )
         },
     )
@@ -485,6 +485,7 @@ def main():
             cache_dir=model_args.cache_dir,
             keep_in_memory=False,
             token=model_args.token,
+            trust_remote_code=model_args.trust_remote_code,
         )
     else:
         data_files = {}
 
@@ -265,6 +265,7 @@ def test_run_flax_speech_recognition_seq2seq(self):
             --dataset_config clean
             --train_split_name validation
             --eval_split_name validation
+            --trust_remote_code
             --output_dir {tmp_dir}
             --overwrite_output_dir
             --num_train_epochs=2
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-datasets >= 1.13.3,<2.20.0 # Temporary upper version`
	`1`	`+datasets >= 1.13.3`
`2`	`2`	`pytest<8.0.1`
`3`	`3`	`conllu`
`4`	`4`	`nltk`