huggingface · NielsRogge · Apr 7, 2024 · Apr 7, 2024 · Apr 7, 2024 · Apr 7, 2024
diff --git a/docs/source/en/tasks/image_classification.md b/docs/source/en/tasks/image_classification.md
@@ -322,7 +322,7 @@ At this point, only three steps remain:
 ...     data_collator=data_collator,
 ...     train_dataset=food["train"],
 ...     eval_dataset=food["test"],
-...     image_processor=image_processor,
+...     processor=image_processor,
 ...     compute_metrics=compute_metrics,
 ... )
 
@@ -418,7 +418,7 @@ and use the [PushToHubCallback](../main_classes/keras_callbacks#transformers.Pus
 >>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_eval_dataset)
 >>> push_to_hub_callback = PushToHubCallback(
 ...     output_dir="food_classifier",
-...     image_processor=image_processor,
+...     processor=image_processor,
 ...     save_strategy="no",
 ... )
 >>> callbacks = [metric_callback, push_to_hub_callback]

diff --git a/docs/source/en/tasks/object_detection.md b/docs/source/en/tasks/object_detection.md
@@ -384,7 +384,7 @@ Finally, bring everything together, and call [`~transformers.Trainer.train`]:
 ...     args=training_args,
 ...     data_collator=collate_fn,
 ...     train_dataset=cppe5["train"],
-...     image_processor=image_processor,
+...     processor=image_processor,
 ... )
 
 >>> trainer.train()

diff --git a/docs/source/en/tasks/video_classification.md b/docs/source/en/tasks/video_classification.md
@@ -407,7 +407,7 @@ Then you just pass all of this along with the datasets to `Trainer`:
 ...     args,
 ...     train_dataset=train_dataset,
 ...     eval_dataset=val_dataset,
-...     image_processor=image_processor,
+...     processor=image_processor,
 ...     compute_metrics=compute_metrics,
 ...     data_collator=collate_fn,
 ... )

diff --git a/docs/source/es/tasks/image_classification.md b/docs/source/es/tasks/image_classification.md
@@ -160,7 +160,7 @@ Al llegar a este punto, solo quedan tres pasos:
 ...     data_collator=data_collator,
 ...     train_dataset=food["train"],
 ...     eval_dataset=food["test"],
-...     image_processor=image_processor,
+...     processor=image_processor,
 ... )
 
 >>> trainer.train()

diff --git a/docs/source/ja/tasks/image_classification.md b/docs/source/ja/tasks/image_classification.md
@@ -328,7 +328,7 @@ food["test"].set_transform(preprocess_val)
 ...     data_collator=data_collator,
 ...     train_dataset=food["train"],
 ...     eval_dataset=food["test"],
-...     image_processor=image_processor,
+...     processor=image_processor,
 ...     compute_metrics=compute_metrics,
 ... )
 
@@ -426,7 +426,7 @@ Convert your datasets to the `tf.data.Dataset` format using the [`~datasets.Data
 >>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_eval_dataset)
 >>> push_to_hub_callback = PushToHubCallback(
 ...     output_dir="food_classifier",
-...     image_processor=image_processor,
+...     processor=image_processor,
 ...     save_strategy="no",
 ... )
 >>> callbacks = [metric_callback, push_to_hub_callback]

diff --git a/docs/source/ja/tasks/object_detection.md b/docs/source/ja/tasks/object_detection.md
@@ -376,7 +376,7 @@ DETR モデルをトレーニングできる「ラベル」。画像プロセッ
 ...     args=training_args,
 ...     data_collator=collate_fn,
 ...     train_dataset=cppe5["train"],
-...     image_processor=image_processor,
+...     processor=image_processor,
 ... )
 
 >>> trainer.train()

diff --git a/docs/source/ja/tasks/video_classification.md b/docs/source/ja/tasks/video_classification.md
@@ -414,7 +414,7 @@ def compute_metrics(eval_pred):
 ...     args,
 ...     train_dataset=train_dataset,
 ...     eval_dataset=val_dataset,
-...     image_processor=image_processor,
+...     processor=image_processor,
 ...     compute_metrics=compute_metrics,
 ...     data_collator=collate_fn,
 ... )

diff --git a/docs/source/ko/tasks/image_classification.md b/docs/source/ko/tasks/image_classification.md
@@ -321,7 +321,7 @@ food["test"].set_transform(preprocess_val)
 ...     data_collator=data_collator,
 ...     train_dataset=food["train"],
 ...     eval_dataset=food["test"],
-...     image_processor=image_processor,
+...     processor=image_processor,
 ...     compute_metrics=compute_metrics,
 ... )
 
@@ -417,7 +417,7 @@ TensorFlow에서 모델을 미세 조정하려면 다음 단계를 따르세요:
 >>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_eval_dataset)
 >>> push_to_hub_callback = PushToHubCallback(
 ...     output_dir="food_classifier",
-...     image_processor=image_processor,
+...     processor=image_processor,
 ...     save_strategy="no",
 ... )
 >>> callbacks = [metric_callback, push_to_hub_callback]

diff --git a/docs/source/ko/tasks/object_detection.md b/docs/source/ko/tasks/object_detection.md
@@ -366,7 +366,7 @@ DatasetDict({
 ...     args=training_args,
 ...     data_collator=collate_fn,
 ...     train_dataset=cppe5["train"],
-...     image_processor=image_processor,
+...     processor=image_processor,
 ... )
 
 >>> trainer.train()

diff --git a/docs/source/ko/tasks/video_classification.md b/docs/source/ko/tasks/video_classification.md
@@ -411,7 +411,7 @@ def compute_metrics(eval_pred):
 ...     args,
 ...     train_dataset=train_dataset,
 ...     eval_dataset=val_dataset,
-...     image_processor=image_processor,
+...     processor=image_processor,
 ...     compute_metrics=compute_metrics,
 ...     data_collator=collate_fn,
 ... )

diff --git a/examples/pytorch/image-classification/run_image_classification.py b/examples/pytorch/image-classification/run_image_classification.py
@@ -411,7 +411,7 @@ def val_transforms(example_batch):
         train_dataset=dataset["train"] if training_args.do_train else None,
         eval_dataset=dataset["validation"] if training_args.do_eval else None,
         compute_metrics=compute_metrics,
-        image_processor=image_processor,
+        processor=image_processor,
         data_collator=collate_fn,
     )
 

diff --git a/examples/pytorch/image-pretraining/run_mae.py b/examples/pytorch/image-pretraining/run_mae.py
@@ -369,7 +369,7 @@ def preprocess_images(examples):
         args=training_args,
         train_dataset=ds["train"] if training_args.do_train else None,
         eval_dataset=ds["validation"] if training_args.do_eval else None,
-        image_processor=image_processor,
+        processor=image_processor,
         data_collator=collate_fn,
     )
 

diff --git a/examples/pytorch/image-pretraining/run_mim.py b/examples/pytorch/image-pretraining/run_mim.py
@@ -458,7 +458,7 @@ def preprocess_images(examples):
         args=training_args,
         train_dataset=ds["train"] if training_args.do_train else None,
         eval_dataset=ds["validation"] if training_args.do_eval else None,
-        image_processor=image_processor,
+        processor=image_processor,
         data_collator=collate_fn,
     )
 

diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py
@@ -510,7 +510,7 @@ def preprocess_val(example_batch):
         train_dataset=dataset["train"] if training_args.do_train else None,
         eval_dataset=dataset["validation"] if training_args.do_eval else None,
         compute_metrics=compute_metrics,
-        image_processor=image_processor,
+        processor=image_processor,
         data_collator=default_data_collator,
     )
 

diff --git a/examples/tensorflow/image-classification/run_image_classification.py b/examples/tensorflow/image-classification/run_image_classification.py
@@ -552,7 +552,7 @@ def compute_metrics(p):
                     output_dir=training_args.output_dir,
                     hub_model_id=push_to_hub_model_id,
                     hub_token=training_args.push_to_hub_token,
-                    image_processor=image_processor,
+                    processor=image_processor,
                     **model_card_kwargs,
                 )
             )

diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
@@ -58,6 +58,7 @@
 from .configuration_utils import PretrainedConfig
 from .data.data_collator import DataCollator, DataCollatorWithPadding, default_data_collator
 from .debug_utils import DebugOption, DebugUnderflowOverflow
+from .feature_extraction_sequence_utils import SequenceFeatureExtractor
 from .hyperparameter_search import ALL_HYPERPARAMETER_SEARCH_BACKENDS, default_hp_search_backend
 from .image_processing_utils import BaseImageProcessor
 from .integrations.deepspeed import deepspeed_init, deepspeed_load_checkpoint, is_deepspeed_available
@@ -69,6 +70,7 @@
     MODEL_MAPPING_NAMES,
 )
 from .optimization import Adafactor, get_scheduler
+from .processing_utils import ProcessorMixin
 from .pytorch_utils import ALL_LAYERNORM_LAYERS, is_torch_greater_or_equal_than_1_13
 from .tokenization_utils_base import PreTrainedTokenizerBase
 from .trainer_callback import (
@@ -285,8 +287,9 @@ class Trainer:
             `output_dir` set to a directory named *tmp_trainer* in the current directory if not provided.
         data_collator (`DataCollator`, *optional*):
             The function to use to form a batch from a list of elements of `train_dataset` or `eval_dataset`. Will
-            default to [`default_data_collator`] if no `tokenizer` is provided, an instance of
-            [`DataCollatorWithPadding`] otherwise.
+            default to [`default_data_collator`] if no `processor` is provided.
+
+            If the `processor` passed is a tokenizer, will default to an instance of [`DataCollatorWithPadding`].
         train_dataset (`torch.utils.data.Dataset` or `torch.utils.data.IterableDataset`, *optional*):
             The dataset to use for training. If it is a [`~datasets.Dataset`], columns not accepted by the
             `model.forward()` method are automatically removed.
@@ -300,10 +303,13 @@ class Trainer:
              The dataset to use for evaluation. If it is a [`~datasets.Dataset`], columns not accepted by the
              `model.forward()` method are automatically removed. If it is a dictionary, it will evaluate on each
              dataset prepending the dictionary key to the metric name.
-        tokenizer ([`PreTrainedTokenizerBase`], *optional*):
-            The tokenizer used to preprocess the data. If provided, will be used to automatically pad the inputs to the
-            maximum length when batching inputs, and it will be saved along the model to make it easier to rerun an
-            interrupted training or reuse the fine-tuned model.
+        processor ([`PreTrainedTokenizer` or `BaseImageProcessor` or `SequenceFeatureExtractor` or `ProcessorMixin`], *optional*):
+            The processor used to preprocess the data. Can be a tokenizer, image processor, feature extractor or multimodal processor.
+
+            If a tokenizer is provided, it will be used to automatically pad the inputs to the
+            maximum length when batching inputs.
+
+            The processor will be saved along the model to make it easier to rerun an interrupted training or reuse the fine-tuned model.
         model_init (`Callable[[], PreTrainedModel]`, *optional*):
             A function that instantiates the model to be used. If provided, each call to [`~Trainer.train`] will start
             from a new instance of the model as given by this function.
@@ -360,14 +366,24 @@ def __init__(
         data_collator: Optional[DataCollator] = None,
         train_dataset: Optional[Union[Dataset, IterableDataset]] = None,
         eval_dataset: Optional[Union[Dataset, Dict[str, Dataset]]] = None,
-        tokenizer: Optional[PreTrainedTokenizerBase] = None,
+        processor: Optional[
+            Union[PreTrainedTokenizerBase, BaseImageProcessor, SequenceFeatureExtractor, ProcessorMixin]
+        ] = None,
         model_init: Optional[Callable[[], PreTrainedModel]] = None,
         compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
         callbacks: Optional[List[TrainerCallback]] = None,
         optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
         preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None,
-        image_processor: Optional["BaseImageProcessor"] = None,
+        tokenizer: Optional[PreTrainedTokenizerBase] = None,
     ):
+        if tokenizer is not None:
+            warnings.warn(
+                "The `tokenizer` argument is deprecated and will be removed in v5 of Transformers. You can use `processor` "
+                "instead to pass your tokenizer/image processor/feature extractor/multimodal processor object.",
+                FutureWarning,
+            )
+            processor = tokenizer
+
         if args is None:
             output_dir = "tmp_trainer"
             logger.info(f"No `TrainingArguments` passed, using `output_dir={output_dir}`.")
@@ -490,12 +506,15 @@ def __init__(
         ):
             self.place_model_on_device = False
 
-        default_collator = DataCollatorWithPadding(tokenizer) if tokenizer is not None else default_data_collator
+        default_collator = (
+            DataCollatorWithPadding(processor)
+            if processor is not None and isinstance(processor, PreTrainedTokenizerBase)
+            else default_data_collator
+        )
         self.data_collator = data_collator if data_collator is not None else default_collator
         self.train_dataset = train_dataset
         self.eval_dataset = eval_dataset
-        self.tokenizer = tokenizer
-        self.image_processor = image_processor
+        self.processor = processor
 
         # Bnb Quantized models doesn't support `.to` operation.
         if (
@@ -547,7 +566,7 @@ def __init__(
         default_callbacks = DEFAULT_CALLBACKS + get_reporting_integration_callbacks(self.args.report_to)
         callbacks = default_callbacks if callbacks is None else default_callbacks + callbacks
         self.callback_handler = CallbackHandler(
-            callbacks, self.model, self.tokenizer, self.image_processor, self.optimizer, self.lr_scheduler
+            callbacks, self.model, self.processor, self.optimizer, self.lr_scheduler
         )
         self.add_callback(PrinterCallback if self.args.disable_tqdm else DEFAULT_PROGRESS_CALLBACK)
 
@@ -668,6 +687,14 @@ def __init__(
             num_devices = xr.global_runtime_device_count()
             xs.set_global_mesh(xs.Mesh(np.array(range(num_devices)), (num_devices, 1), axis_names=("fsdp", "tensor")))
 
+    @property
+    def tokenizer(self):
+        warnings.warn(
+            "The 'tokenizer' attribute is deprecated and will be removed in v5 of Transformers. Use `processor` instead",
+            FutureWarning,
+        )
+        return self.processor
+
     def _activate_neftune(self, model):
         r"""
         Activates the neftune as presented in this code: https://github.com/neelsjain/NEFTune and paper:
@@ -821,7 +848,7 @@ def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
                 )
             else:
                 lengths = None
-            model_input_name = self.tokenizer.model_input_names[0] if self.tokenizer is not None else None
+            model_input_name = self.processor.model_input_names[0] if self.processor is not None else None
             return LengthGroupedSampler(
                 self.args.train_batch_size * self.args.gradient_accumulation_steps,
                 dataset=self.train_dataset,
@@ -3280,10 +3307,8 @@ def _save_tpu(self, output_dir: Optional[str] = None):
                 save_function=xm.save,
                 safe_serialization=self.args.save_safetensors,
             )
-        if self.tokenizer is not None and self.args.should_save:
-            self.tokenizer.save_pretrained(output_dir)
-        if self.image_processor is not None and self.args.should_save:
-            self.image_processor.save_pretrained(output_dir)
+        if self.processor is not None and self.args.should_save:
+            self.processor.save_pretrained(output_dir)
 
         # We moved the model from TPU -> CPU for saving the weights.
         # Now we should move it back to subsequent compute still works.
@@ -3319,10 +3344,8 @@ def _save(self, output_dir: Optional[str] = None, state_dict=None):
                 output_dir, state_dict=state_dict, safe_serialization=self.args.save_safetensors
             )
 
-        if self.tokenizer is not None:
-            self.tokenizer.save_pretrained(output_dir)
-        if self.image_processor is not None:
-            self.image_processor.save_pretrained(output_dir)
+        if self.processor is not None:
+            self.processor.save_pretrained(output_dir)
 
         # Good practice: save your training arguments together with the trained model
         torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME))
@@ -4017,12 +4040,9 @@ def _push_from_checkpoint(self, checkpoint_folder):
         for modeling_file in modeling_files:
             if os.path.isfile(os.path.join(checkpoint_folder, modeling_file)):
                 shutil.copy(os.path.join(checkpoint_folder, modeling_file), os.path.join(output_dir, modeling_file))
-        # Saving the tokenizer is fast and we don't know how many files it may have spawned, so we resave it to be sure.
-        if self.tokenizer is not None:
-            self.tokenizer.save_pretrained(output_dir)
-        # Same for the image processor
-        if self.image_processor is not None:
-            self.image_processor.save_pretrained(output_dir)
+        # Saving the processor and we don't know how many files it may have spawned, so we resave it to be sure.
+        if self.processor is not None:
+            self.processor.save_pretrained(output_dir)
         # Same for the training arguments
         torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME))
 
@@ -4076,7 +4096,7 @@ def push_to_hub(
         **kwargs,
     ) -> str:
         """
-        Upload `self.model` and `self.tokenizer` or `self.image_processor` to the 🤗 model hub on the repo `self.args.hub_model_id`.
+        Upload `self.model` and `self.processor` to the 🤗 model hub on the repo `self.args.hub_model_id`.
 
         Parameters:
             commit_message (`str`, *optional*, defaults to `"End of training"`):

diff --git a/src/transformers/trainer_callback.py b/src/transformers/trainer_callback.py
@@ -17,6 +17,7 @@
 """
 import dataclasses
 import json
+import warnings
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Union
 
@@ -187,10 +188,8 @@ class TrainerCallback:
             The object that is returned to the [`Trainer`] and can be used to make some decisions.
         model ([`PreTrainedModel`] or `torch.nn.Module`):
             The model being trained.
-        tokenizer ([`PreTrainedTokenizer`]):
-            The tokenizer used for encoding the data.
-        image_processor ([`BaseImageProcessor`]):
-            The image processor used for encoding the images.
+        processor ([`PreTrainedTokenizer` or `BaseImageProcessor` or `SequenceFeatureExtractor` or `ProcessorMixin`]):
+            The processor used to preprocess the data. Can be a tokenizer, image processor, feature extractor or multimodal processor.
         optimizer (`torch.optim.Optimizer`):
             The optimizer used for the training steps.
         lr_scheduler (`torch.optim.lr_scheduler.LambdaLR`):
@@ -309,13 +308,20 @@ def on_prediction_step(self, args: TrainingArguments, state: TrainerState, contr
 class CallbackHandler(TrainerCallback):
     """Internal class that just calls the list of callbacks in order."""
 
-    def __init__(self, callbacks, model, tokenizer, image_processor, optimizer, lr_scheduler):
+    def __init__(self, callbacks, model, processor, optimizer, lr_scheduler, tokenizer=None):
+        if tokenizer is not None:
+            warnings.warn(
+                "The `tokenizer` argument is deprecated and will be removed in v5 of Transformers. You can use `processor` "
+                "instead to pass your tokenizer/image processor/feature extractor/multimodal processor object.",
+                FutureWarning,
+            )
+            processor = tokenizer
+
         self.callbacks = []
         for cb in callbacks:
             self.add_callback(cb)
         self.model = model
-        self.tokenizer = tokenizer
-        self.image_processor = image_processor
+        self.processor = processor
         self.optimizer = optimizer
         self.lr_scheduler = lr_scheduler
         self.train_dataloader = None
@@ -419,8 +425,7 @@ def call_event(self, event, args, state, control, **kwargs):
                 state,
                 control,
                 model=self.model,
-                tokenizer=self.tokenizer,
-                image_processor=self.image_processor,
+                processor=self.processor,
                 optimizer=self.optimizer,
                 lr_scheduler=self.lr_scheduler,
                 train_dataloader=self.train_dataloader,