examples/tensorflow/language-modeling/run_mlm.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -499,7 +499,7 @@ def group_texts(examples): @@
             # region TF Dataset preparation
             num_replicas = training_args.strategy.num_replicas_in_sync
             data_collator = DataCollatorForLanguageModeling(
-                tokenizer=tokenizer, mlm_probability=data_args.mlm_probability, return_tensors="tf"
+                tokenizer=tokenizer, mlm_probability=data_args.mlm_probability, return_tensors="np"
             )
             options = tf.data.Options()
             options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
@@ Expand Down @@

examples/tensorflow/multiple-choice/run_swag.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -105,7 +105,7 @@ def __call__(self, features):
  
                padding=self.padding,

                max_length=self.max_length,

                pad_to_multiple_of=self.pad_to_multiple_of,

                return_tensors="tf",

                return_tensors="np",

            )

            # Un-flatten

    @@ -410,7 +410,7 @@ def preprocess_function(examples):
  
                )

        if data_args.pad_to_max_length:

            data_collator = DefaultDataCollator(return_tensors="tf")

            data_collator = DefaultDataCollator(return_tensors="np")

        else:

            # custom class defined above, as HF has no data collator for multiple choice

            data_collator = DataCollatorForMultipleChoice(tokenizer)

examples/tensorflow/summarization/run_summarization.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -533,7 +533,7 @@ def postprocess_text(preds, labels): @@
                 model=model,
                 label_pad_token_id=label_pad_token_id,
                 pad_to_multiple_of=128,  # Reduce the number of unique shapes for XLA, especially for generation
-                return_tensors="tf",
+                return_tensors="np",
             )
             dataset_options = tf.data.Options()
@@ Expand Down @@

examples/tensorflow/text-classification/run_glue.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -345,9 +345,9 @@ def preprocess_function(examples):
  
        datasets = datasets.map(preprocess_function, batched=True, load_from_cache_file=not data_args.overwrite_cache)

        if data_args.pad_to_max_length:

            data_collator = DefaultDataCollator(return_tensors="tf")

            data_collator = DefaultDataCollator(return_tensors="np")

        else:

            data_collator = DataCollatorWithPadding(tokenizer, return_tensors="tf")

            data_collator = DataCollatorWithPadding(tokenizer, return_tensors="np")

        # endregion

        # region Metric function

examples/tensorflow/token-classification/run_ner.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -396,7 +396,7 @@ def tokenize_and_align_labels(examples): @@
             # We need the DataCollatorForTokenClassification here, as we need to correctly pad labels as
             # well as inputs.
-            collate_fn = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")
+            collate_fn = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="np")
             num_replicas = training_args.strategy.num_replicas_in_sync
             total_train_batch_size = training_args.per_device_train_batch_size * num_replicas
@@ Expand Down @@

examples/tensorflow/translation/run_translation.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -499,7 +499,7 @@ def preprocess_function(examples): @@
                 model=model,
                 label_pad_token_id=label_pad_token_id,
                 pad_to_multiple_of=64,  # Reduce the number of unique shapes for XLA, especially for generation
-                return_tensors="tf",
+                return_tensors="np",
             )
             num_replicas = training_args.strategy.num_replicas_in_sync
             total_train_batch_size = training_args.per_device_train_batch_size * num_replicas
@@ Expand Down @@

Use return_tensors="np" instead of "tf" #21266

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

Rocketknight1 merged 1 commit into main from return_np_tensors_in_examples

Jan 24, 2023

-Original file line number
+Diff line change
@@ Expand Up / @@ -499,7 +499,7 @@ def group_texts(examples): @@
             # region TF Dataset preparation
             num_replicas = training_args.strategy.num_replicas_in_sync
             data_collator = DataCollatorForLanguageModeling(
-                tokenizer=tokenizer, mlm_probability=data_args.mlm_probability, return_tensors="tf"
+                tokenizer=tokenizer, mlm_probability=data_args.mlm_probability, return_tensors="np"
             )
             options = tf.data.Options()
             options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -533,7 +533,7 @@ def postprocess_text(preds, labels): @@
                 model=model,
                 label_pad_token_id=label_pad_token_id,
                 pad_to_multiple_of=128,  # Reduce the number of unique shapes for XLA, especially for generation
-                return_tensors="tf",
+                return_tensors="np",
             )
             dataset_options = tf.data.Options()
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -396,7 +396,7 @@ def tokenize_and_align_labels(examples): @@
             # We need the DataCollatorForTokenClassification here, as we need to correctly pad labels as
             # well as inputs.
-            collate_fn = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")
+            collate_fn = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="np")
             num_replicas = training_args.strategy.num_replicas_in_sync
             total_train_batch_size = training_args.per_device_train_batch_size * num_replicas
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -499,7 +499,7 @@ def preprocess_function(examples): @@
                 model=model,
                 label_pad_token_id=label_pad_token_id,
                 pad_to_multiple_of=64,  # Reduce the number of unique shapes for XLA, especially for generation
-                return_tensors="tf",
+                return_tensors="np",
             )
             num_replicas = training_args.strategy.num_replicas_in_sync
             total_train_batch_size = training_args.per_device_train_batch_size * num_replicas
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Use return_tensors="np" instead of "tf" #21266

Uh oh!

Diff view

Diff view

There are no files selected for viewing