Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ def compute_metrics(pred):
args=training_args,
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
tokenizer=feature_extractor,
processor=processor,
data_collator=data_collator,
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,7 @@ def compute_metrics(pred):
compute_metrics=compute_metrics,
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
tokenizer=feature_extractor,
processor=processor,
optimizers=optimizers,
)

Expand Down
4 changes: 2 additions & 2 deletions examples/research_projects/xtreme-s/run_xtreme_s.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,7 +844,7 @@ def compute_classification_metric(pred):
compute_metrics=compute_asr_metric if training_args.predict_with_generate else None,
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
tokenizer=feature_extractor,
processor=processor,
)
else:
trainer = Trainer(
Expand All @@ -855,7 +855,7 @@ def compute_classification_metric(pred):
compute_metrics=compute_asr_metric if is_text_target else compute_classification_metric,
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
tokenizer=feature_extractor,
processor=processor,
)

# 8. Finally, we can start training
Expand Down
20 changes: 19 additions & 1 deletion src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
MODEL_MAPPING_NAMES,
)
from .optimization import Adafactor, get_scheduler
from .processing_utils import ProcessorMixin
from .pytorch_utils import (
ALL_LAYERNORM_LAYERS,
is_torch_greater_or_equal_than_1_13,
Expand Down Expand Up @@ -318,6 +319,10 @@ class Trainer:
The tokenizer used to preprocess the data. If provided, will be used to automatically pad the inputs to the
maximum length when batching inputs, and it will be saved along the model to make it easier to rerun an
interrupted training or reuse the fine-tuned model.
processor ([`ProcessorMixin`], *optional*):
The processor used to pre- and post-process the data for multimodal models. If provided, will be used to
automatically pad the inputs to the maximum length when batching inputs, and it will be saved along the
model to make it easier to rerun an interrupted training or reuse the fine-tuned model.
model_init (`Callable[[], PreTrainedModel]`, *optional*):
A function that instantiates the model to be used. If provided, each call to [`~Trainer.train`] will start
from a new instance of the model as given by this function.
Expand Down Expand Up @@ -375,6 +380,7 @@ def __init__(
train_dataset: Optional[Union[Dataset, IterableDataset, "datasets.Dataset"]] = None,
eval_dataset: Optional[Union[Dataset, Dict[str, Dataset], "datasets.Dataset"]] = None,
tokenizer: Optional[PreTrainedTokenizerBase] = None,
processor: Optional[ProcessorMixin] = None,
model_init: Optional[Callable[[], PreTrainedModel]] = None,
compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
callbacks: Optional[List[TrainerCallback]] = None,
Expand Down Expand Up @@ -510,6 +516,19 @@ def __init__(
):
self.place_model_on_device = False

if processor is not None and tokenizer is not None:
raise ValueError(
"You cannot pass both `processor` and `tokenizer` to the Trainer. Only pass the `processor` if defined."
)
elif processor is not None:
self.tokenizer = processor
if hasattr(processor, "feature_extractor"):
tokenizer = processor.feature_extractor
elif hasattr(processor, "tokenizer"):
tokenizer = processor.tokenizer
else:
self.tokenizer = tokenizer

default_collator = (
DataCollatorWithPadding(tokenizer)
if tokenizer is not None and isinstance(tokenizer, (PreTrainedTokenizerBase, SequenceFeatureExtractor))
Expand All @@ -518,7 +537,6 @@ def __init__(
self.data_collator = data_collator if data_collator is not None else default_collator
self.train_dataset = train_dataset
self.eval_dataset = eval_dataset
self.tokenizer = tokenizer

# Bnb Quantized models doesn't support `.to` operation.
if (
Expand Down
3 changes: 3 additions & 0 deletions src/transformers/trainer_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
if TYPE_CHECKING:
from .data.data_collator import DataCollator
from .modeling_utils import PreTrainedModel
from .processing_utils import ProcessorMixin
from .tokenization_utils_base import PreTrainedTokenizerBase
from .trainer_callback import TrainerCallback
from .trainer_utils import EvalPrediction, PredictionOutput
Expand All @@ -48,6 +49,7 @@ def __init__(
train_dataset: Optional[Dataset] = None,
eval_dataset: Optional[Union[Dataset, Dict[str, Dataset]]] = None,
tokenizer: Optional["PreTrainedTokenizerBase"] = None,
processor: Optional["ProcessorMixin"] = None,
model_init: Optional[Callable[[], "PreTrainedModel"]] = None,
compute_metrics: Optional[Callable[["EvalPrediction"], Dict]] = None,
callbacks: Optional[List["TrainerCallback"]] = None,
Expand All @@ -61,6 +63,7 @@ def __init__(
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=tokenizer,
processor=processor,
model_init=model_init,
compute_metrics=compute_metrics,
callbacks=callbacks,
Expand Down