diff --git a/docs/source/en/tasks/image_classification.md b/docs/source/en/tasks/image_classification.md index f54b4ed025d3..bdb261bc87a9 100644 --- a/docs/source/en/tasks/image_classification.md +++ b/docs/source/en/tasks/image_classification.md @@ -322,7 +322,7 @@ At this point, only three steps remain: ... data_collator=data_collator, ... train_dataset=food["train"], ... eval_dataset=food["test"], -... image_processor=image_processor, +... processor=image_processor, ... compute_metrics=compute_metrics, ... ) @@ -418,7 +418,7 @@ and use the [PushToHubCallback](../main_classes/keras_callbacks#transformers.Pus >>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_eval_dataset) >>> push_to_hub_callback = PushToHubCallback( ... output_dir="food_classifier", -... image_processor=image_processor, +... processor=image_processor, ... save_strategy="no", ... ) >>> callbacks = [metric_callback, push_to_hub_callback] diff --git a/docs/source/en/tasks/object_detection.md b/docs/source/en/tasks/object_detection.md index 56d46e4aa522..004de8f10ecc 100644 --- a/docs/source/en/tasks/object_detection.md +++ b/docs/source/en/tasks/object_detection.md @@ -384,7 +384,7 @@ Finally, bring everything together, and call [`~transformers.Trainer.train`]: ... args=training_args, ... data_collator=collate_fn, ... train_dataset=cppe5["train"], -... image_processor=image_processor, +... processor=image_processor, ... ) >>> trainer.train() diff --git a/docs/source/en/tasks/video_classification.md b/docs/source/en/tasks/video_classification.md index a0f0a695f705..3e946fead058 100644 --- a/docs/source/en/tasks/video_classification.md +++ b/docs/source/en/tasks/video_classification.md @@ -407,7 +407,7 @@ Then you just pass all of this along with the datasets to `Trainer`: ... args, ... train_dataset=train_dataset, ... eval_dataset=val_dataset, -... image_processor=image_processor, +... processor=image_processor, ... compute_metrics=compute_metrics, ... data_collator=collate_fn, ... ) diff --git a/docs/source/es/tasks/image_classification.md b/docs/source/es/tasks/image_classification.md index 4a572d816985..c9427ecdcf58 100644 --- a/docs/source/es/tasks/image_classification.md +++ b/docs/source/es/tasks/image_classification.md @@ -160,7 +160,7 @@ Al llegar a este punto, solo quedan tres pasos: ... data_collator=data_collator, ... train_dataset=food["train"], ... eval_dataset=food["test"], -... image_processor=image_processor, +... processor=image_processor, ... ) >>> trainer.train() diff --git a/docs/source/ja/tasks/image_classification.md b/docs/source/ja/tasks/image_classification.md index fc57cf4dfb9b..2916dfa5e13c 100644 --- a/docs/source/ja/tasks/image_classification.md +++ b/docs/source/ja/tasks/image_classification.md @@ -328,7 +328,7 @@ food["test"].set_transform(preprocess_val) ... data_collator=data_collator, ... train_dataset=food["train"], ... eval_dataset=food["test"], -... image_processor=image_processor, +... processor=image_processor, ... compute_metrics=compute_metrics, ... ) @@ -426,7 +426,7 @@ Convert your datasets to the `tf.data.Dataset` format using the [`~datasets.Data >>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_eval_dataset) >>> push_to_hub_callback = PushToHubCallback( ... output_dir="food_classifier", -... image_processor=image_processor, +... processor=image_processor, ... save_strategy="no", ... ) >>> callbacks = [metric_callback, push_to_hub_callback] diff --git a/docs/source/ja/tasks/object_detection.md b/docs/source/ja/tasks/object_detection.md index e90cb4645a1f..2d065efe846b 100644 --- a/docs/source/ja/tasks/object_detection.md +++ b/docs/source/ja/tasks/object_detection.md @@ -376,7 +376,7 @@ DETR モデルをトレーニングできる「ラベル」。画像プロセッ ... args=training_args, ... data_collator=collate_fn, ... train_dataset=cppe5["train"], -... image_processor=image_processor, +... processor=image_processor, ... ) >>> trainer.train() diff --git a/docs/source/ja/tasks/video_classification.md b/docs/source/ja/tasks/video_classification.md index b0b5139028b2..e444df102dba 100644 --- a/docs/source/ja/tasks/video_classification.md +++ b/docs/source/ja/tasks/video_classification.md @@ -414,7 +414,7 @@ def compute_metrics(eval_pred): ... args, ... train_dataset=train_dataset, ... eval_dataset=val_dataset, -... image_processor=image_processor, +... processor=image_processor, ... compute_metrics=compute_metrics, ... data_collator=collate_fn, ... ) diff --git a/docs/source/ko/tasks/image_classification.md b/docs/source/ko/tasks/image_classification.md index 055100d4c0b1..494c687faae9 100644 --- a/docs/source/ko/tasks/image_classification.md +++ b/docs/source/ko/tasks/image_classification.md @@ -321,7 +321,7 @@ food["test"].set_transform(preprocess_val) ... data_collator=data_collator, ... train_dataset=food["train"], ... eval_dataset=food["test"], -... image_processor=image_processor, +... processor=image_processor, ... compute_metrics=compute_metrics, ... ) @@ -417,7 +417,7 @@ TensorFlow에서 모델을 미세 조정하려면 다음 단계를 따르세요: >>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_eval_dataset) >>> push_to_hub_callback = PushToHubCallback( ... output_dir="food_classifier", -... image_processor=image_processor, +... processor=image_processor, ... save_strategy="no", ... ) >>> callbacks = [metric_callback, push_to_hub_callback] diff --git a/docs/source/ko/tasks/object_detection.md b/docs/source/ko/tasks/object_detection.md index 1eeada9a50ee..02191af6369c 100644 --- a/docs/source/ko/tasks/object_detection.md +++ b/docs/source/ko/tasks/object_detection.md @@ -366,7 +366,7 @@ DatasetDict({ ... args=training_args, ... data_collator=collate_fn, ... train_dataset=cppe5["train"], -... image_processor=image_processor, +... processor=image_processor, ... ) >>> trainer.train() diff --git a/docs/source/ko/tasks/video_classification.md b/docs/source/ko/tasks/video_classification.md index 4d13f9ac6105..e44b07809d36 100644 --- a/docs/source/ko/tasks/video_classification.md +++ b/docs/source/ko/tasks/video_classification.md @@ -411,7 +411,7 @@ def compute_metrics(eval_pred): ... args, ... train_dataset=train_dataset, ... eval_dataset=val_dataset, -... image_processor=image_processor, +... processor=image_processor, ... compute_metrics=compute_metrics, ... data_collator=collate_fn, ... ) diff --git a/examples/pytorch/image-classification/run_image_classification.py b/examples/pytorch/image-classification/run_image_classification.py index 1c952e560144..805db5d32dae 100755 --- a/examples/pytorch/image-classification/run_image_classification.py +++ b/examples/pytorch/image-classification/run_image_classification.py @@ -411,7 +411,7 @@ def val_transforms(example_batch): train_dataset=dataset["train"] if training_args.do_train else None, eval_dataset=dataset["validation"] if training_args.do_eval else None, compute_metrics=compute_metrics, - image_processor=image_processor, + processor=image_processor, data_collator=collate_fn, ) diff --git a/examples/pytorch/image-pretraining/run_mae.py b/examples/pytorch/image-pretraining/run_mae.py index 0f098caf0237..d89dd076e55a 100644 --- a/examples/pytorch/image-pretraining/run_mae.py +++ b/examples/pytorch/image-pretraining/run_mae.py @@ -369,7 +369,7 @@ def preprocess_images(examples): args=training_args, train_dataset=ds["train"] if training_args.do_train else None, eval_dataset=ds["validation"] if training_args.do_eval else None, - image_processor=image_processor, + processor=image_processor, data_collator=collate_fn, ) diff --git a/examples/pytorch/image-pretraining/run_mim.py b/examples/pytorch/image-pretraining/run_mim.py index e1afeece12c8..2a47f3864341 100644 --- a/examples/pytorch/image-pretraining/run_mim.py +++ b/examples/pytorch/image-pretraining/run_mim.py @@ -458,7 +458,7 @@ def preprocess_images(examples): args=training_args, train_dataset=ds["train"] if training_args.do_train else None, eval_dataset=ds["validation"] if training_args.do_eval else None, - image_processor=image_processor, + processor=image_processor, data_collator=collate_fn, ) diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py index 8324531ccb04..101b9bce7921 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py @@ -510,7 +510,7 @@ def preprocess_val(example_batch): train_dataset=dataset["train"] if training_args.do_train else None, eval_dataset=dataset["validation"] if training_args.do_eval else None, compute_metrics=compute_metrics, - image_processor=image_processor, + processor=image_processor, data_collator=default_data_collator, ) diff --git a/examples/tensorflow/image-classification/run_image_classification.py b/examples/tensorflow/image-classification/run_image_classification.py index ab2de73a3b83..727aa6663654 100644 --- a/examples/tensorflow/image-classification/run_image_classification.py +++ b/examples/tensorflow/image-classification/run_image_classification.py @@ -552,7 +552,7 @@ def compute_metrics(p): output_dir=training_args.output_dir, hub_model_id=push_to_hub_model_id, hub_token=training_args.push_to_hub_token, - image_processor=image_processor, + processor=image_processor, **model_card_kwargs, ) ) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 227e92fa638d..9eb4c5992cba 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -58,6 +58,7 @@ from .configuration_utils import PretrainedConfig from .data.data_collator import DataCollator, DataCollatorWithPadding, default_data_collator from .debug_utils import DebugOption, DebugUnderflowOverflow +from .feature_extraction_sequence_utils import SequenceFeatureExtractor from .hyperparameter_search import ALL_HYPERPARAMETER_SEARCH_BACKENDS, default_hp_search_backend from .image_processing_utils import BaseImageProcessor from .integrations.deepspeed import deepspeed_init, deepspeed_load_checkpoint, is_deepspeed_available @@ -69,6 +70,7 @@ MODEL_MAPPING_NAMES, ) from .optimization import Adafactor, get_scheduler +from .processing_utils import ProcessorMixin from .pytorch_utils import ALL_LAYERNORM_LAYERS, is_torch_greater_or_equal_than_1_13 from .tokenization_utils_base import PreTrainedTokenizerBase from .trainer_callback import ( @@ -285,8 +287,9 @@ class Trainer: `output_dir` set to a directory named *tmp_trainer* in the current directory if not provided. data_collator (`DataCollator`, *optional*): The function to use to form a batch from a list of elements of `train_dataset` or `eval_dataset`. Will - default to [`default_data_collator`] if no `tokenizer` is provided, an instance of - [`DataCollatorWithPadding`] otherwise. + default to [`default_data_collator`] if no `processor` is provided. + + If the `processor` passed is a tokenizer, will default to an instance of [`DataCollatorWithPadding`]. train_dataset (`torch.utils.data.Dataset` or `torch.utils.data.IterableDataset`, *optional*): The dataset to use for training. If it is a [`~datasets.Dataset`], columns not accepted by the `model.forward()` method are automatically removed. @@ -300,10 +303,13 @@ class Trainer: The dataset to use for evaluation. If it is a [`~datasets.Dataset`], columns not accepted by the `model.forward()` method are automatically removed. If it is a dictionary, it will evaluate on each dataset prepending the dictionary key to the metric name. - tokenizer ([`PreTrainedTokenizerBase`], *optional*): - The tokenizer used to preprocess the data. If provided, will be used to automatically pad the inputs to the - maximum length when batching inputs, and it will be saved along the model to make it easier to rerun an - interrupted training or reuse the fine-tuned model. + processor ([`PreTrainedTokenizer` or `BaseImageProcessor` or `SequenceFeatureExtractor` or `ProcessorMixin`], *optional*): + The processor used to preprocess the data. Can be a tokenizer, image processor, feature extractor or multimodal processor. + + If a tokenizer is provided, it will be used to automatically pad the inputs to the + maximum length when batching inputs. + + The processor will be saved along the model to make it easier to rerun an interrupted training or reuse the fine-tuned model. model_init (`Callable[[], PreTrainedModel]`, *optional*): A function that instantiates the model to be used. If provided, each call to [`~Trainer.train`] will start from a new instance of the model as given by this function. @@ -360,14 +366,24 @@ def __init__( data_collator: Optional[DataCollator] = None, train_dataset: Optional[Union[Dataset, IterableDataset]] = None, eval_dataset: Optional[Union[Dataset, Dict[str, Dataset]]] = None, - tokenizer: Optional[PreTrainedTokenizerBase] = None, + processor: Optional[ + Union[PreTrainedTokenizerBase, BaseImageProcessor, SequenceFeatureExtractor, ProcessorMixin] + ] = None, model_init: Optional[Callable[[], PreTrainedModel]] = None, compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None, callbacks: Optional[List[TrainerCallback]] = None, optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, - image_processor: Optional["BaseImageProcessor"] = None, + tokenizer: Optional[PreTrainedTokenizerBase] = None, ): + if tokenizer is not None: + warnings.warn( + "The `tokenizer` argument is deprecated and will be removed in v5 of Transformers. You can use `processor` " + "instead to pass your tokenizer/image processor/feature extractor/multimodal processor object.", + FutureWarning, + ) + processor = tokenizer + if args is None: output_dir = "tmp_trainer" logger.info(f"No `TrainingArguments` passed, using `output_dir={output_dir}`.") @@ -490,12 +506,15 @@ def __init__( ): self.place_model_on_device = False - default_collator = DataCollatorWithPadding(tokenizer) if tokenizer is not None else default_data_collator + default_collator = ( + DataCollatorWithPadding(processor) + if processor is not None and isinstance(processor, PreTrainedTokenizerBase) + else default_data_collator + ) self.data_collator = data_collator if data_collator is not None else default_collator self.train_dataset = train_dataset self.eval_dataset = eval_dataset - self.tokenizer = tokenizer - self.image_processor = image_processor + self.processor = processor # Bnb Quantized models doesn't support `.to` operation. if ( @@ -547,7 +566,7 @@ def __init__( default_callbacks = DEFAULT_CALLBACKS + get_reporting_integration_callbacks(self.args.report_to) callbacks = default_callbacks if callbacks is None else default_callbacks + callbacks self.callback_handler = CallbackHandler( - callbacks, self.model, self.tokenizer, self.image_processor, self.optimizer, self.lr_scheduler + callbacks, self.model, self.processor, self.optimizer, self.lr_scheduler ) self.add_callback(PrinterCallback if self.args.disable_tqdm else DEFAULT_PROGRESS_CALLBACK) @@ -668,6 +687,14 @@ def __init__( num_devices = xr.global_runtime_device_count() xs.set_global_mesh(xs.Mesh(np.array(range(num_devices)), (num_devices, 1), axis_names=("fsdp", "tensor"))) + @property + def tokenizer(self): + warnings.warn( + "The 'tokenizer' attribute is deprecated and will be removed in v5 of Transformers. Use `processor` instead", + FutureWarning, + ) + return self.processor + def _activate_neftune(self, model): r""" Activates the neftune as presented in this code: https://github.com/neelsjain/NEFTune and paper: @@ -821,7 +848,7 @@ def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]: ) else: lengths = None - model_input_name = self.tokenizer.model_input_names[0] if self.tokenizer is not None else None + model_input_name = self.processor.model_input_names[0] if self.processor is not None else None return LengthGroupedSampler( self.args.train_batch_size * self.args.gradient_accumulation_steps, dataset=self.train_dataset, @@ -3280,10 +3307,8 @@ def _save_tpu(self, output_dir: Optional[str] = None): save_function=xm.save, safe_serialization=self.args.save_safetensors, ) - if self.tokenizer is not None and self.args.should_save: - self.tokenizer.save_pretrained(output_dir) - if self.image_processor is not None and self.args.should_save: - self.image_processor.save_pretrained(output_dir) + if self.processor is not None and self.args.should_save: + self.processor.save_pretrained(output_dir) # We moved the model from TPU -> CPU for saving the weights. # Now we should move it back to subsequent compute still works. @@ -3319,10 +3344,8 @@ def _save(self, output_dir: Optional[str] = None, state_dict=None): output_dir, state_dict=state_dict, safe_serialization=self.args.save_safetensors ) - if self.tokenizer is not None: - self.tokenizer.save_pretrained(output_dir) - if self.image_processor is not None: - self.image_processor.save_pretrained(output_dir) + if self.processor is not None: + self.processor.save_pretrained(output_dir) # Good practice: save your training arguments together with the trained model torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME)) @@ -4017,12 +4040,9 @@ def _push_from_checkpoint(self, checkpoint_folder): for modeling_file in modeling_files: if os.path.isfile(os.path.join(checkpoint_folder, modeling_file)): shutil.copy(os.path.join(checkpoint_folder, modeling_file), os.path.join(output_dir, modeling_file)) - # Saving the tokenizer is fast and we don't know how many files it may have spawned, so we resave it to be sure. - if self.tokenizer is not None: - self.tokenizer.save_pretrained(output_dir) - # Same for the image processor - if self.image_processor is not None: - self.image_processor.save_pretrained(output_dir) + # Saving the processor and we don't know how many files it may have spawned, so we resave it to be sure. + if self.processor is not None: + self.processor.save_pretrained(output_dir) # Same for the training arguments torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME)) @@ -4076,7 +4096,7 @@ def push_to_hub( **kwargs, ) -> str: """ - Upload `self.model` and `self.tokenizer` or `self.image_processor` to the 🤗 model hub on the repo `self.args.hub_model_id`. + Upload `self.model` and `self.processor` to the 🤗 model hub on the repo `self.args.hub_model_id`. Parameters: commit_message (`str`, *optional*, defaults to `"End of training"`): diff --git a/src/transformers/trainer_callback.py b/src/transformers/trainer_callback.py index a9cb6eca596f..d46f7633ac7e 100644 --- a/src/transformers/trainer_callback.py +++ b/src/transformers/trainer_callback.py @@ -17,6 +17,7 @@ """ import dataclasses import json +import warnings from dataclasses import dataclass from typing import Dict, List, Optional, Union @@ -187,10 +188,8 @@ class TrainerCallback: The object that is returned to the [`Trainer`] and can be used to make some decisions. model ([`PreTrainedModel`] or `torch.nn.Module`): The model being trained. - tokenizer ([`PreTrainedTokenizer`]): - The tokenizer used for encoding the data. - image_processor ([`BaseImageProcessor`]): - The image processor used for encoding the images. + processor ([`PreTrainedTokenizer` or `BaseImageProcessor` or `SequenceFeatureExtractor` or `ProcessorMixin`]): + The processor used to preprocess the data. Can be a tokenizer, image processor, feature extractor or multimodal processor. optimizer (`torch.optim.Optimizer`): The optimizer used for the training steps. lr_scheduler (`torch.optim.lr_scheduler.LambdaLR`): @@ -309,13 +308,20 @@ def on_prediction_step(self, args: TrainingArguments, state: TrainerState, contr class CallbackHandler(TrainerCallback): """Internal class that just calls the list of callbacks in order.""" - def __init__(self, callbacks, model, tokenizer, image_processor, optimizer, lr_scheduler): + def __init__(self, callbacks, model, processor, optimizer, lr_scheduler, tokenizer=None): + if tokenizer is not None: + warnings.warn( + "The `tokenizer` argument is deprecated and will be removed in v5 of Transformers. You can use `processor` " + "instead to pass your tokenizer/image processor/feature extractor/multimodal processor object.", + FutureWarning, + ) + processor = tokenizer + self.callbacks = [] for cb in callbacks: self.add_callback(cb) self.model = model - self.tokenizer = tokenizer - self.image_processor = image_processor + self.processor = processor self.optimizer = optimizer self.lr_scheduler = lr_scheduler self.train_dataloader = None @@ -419,8 +425,7 @@ def call_event(self, event, args, state, control, **kwargs): state, control, model=self.model, - tokenizer=self.tokenizer, - image_processor=self.image_processor, + processor=self.processor, optimizer=self.optimizer, lr_scheduler=self.lr_scheduler, train_dataloader=self.train_dataloader,