Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 66 additions & 24 deletions src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,21 @@
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union

import numpy as np
import torch
from huggingface_hub import Repository
from packaging import version
from torch import nn
from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler
from tqdm.auto import tqdm

from . import __version__
from .configuration_utils import PretrainedConfig
from .data.data_collator import DataCollator, DataCollatorWithPadding, default_data_collator
from .debug_utils import DebugOption, DebugUnderflowOverflow
from .deepspeed import deepspeed_init, deepspeed_reinit, is_deepspeed_zero3_enabled
from .dependency_versions_check import dep_version_check

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't be changed by make style. Are you sure you have the proper version of the formatting tools installed (pip install .[quality] in the repo)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I'm sorry. I tried to install manually some of the dependencies, but it was still failing some parts. I've done the changes to bring it back to its original state, adding your also the changes from your empty lines.


# Integrations must be imported before ML frameworks:
from .integrations import ( # isort: split
Expand All @@ -48,22 +61,6 @@
run_hp_search_sigopt,
run_hp_search_wandb,
)

import numpy as np
import torch
from packaging import version
from torch import nn
from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler

from huggingface_hub import Repository

from . import __version__
from .configuration_utils import PretrainedConfig
from .data.data_collator import DataCollator, DataCollatorWithPadding, default_data_collator
from .debug_utils import DebugOption, DebugUnderflowOverflow
from .deepspeed import deepspeed_init, deepspeed_reinit, is_deepspeed_zero3_enabled
from .dependency_versions_check import dep_version_check
from .modelcard import TrainingSummary
from .modeling_utils import PreTrainedModel, unwrap_model
from .models.auto.modeling_auto import MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES
Expand Down Expand Up @@ -181,13 +178,11 @@

from .trainer_pt_utils import smp_forward_backward, smp_forward_only, smp_gather, smp_nested_concat


Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to remove this line.

if TYPE_CHECKING:
import optuna

logger = logging.get_logger(__name__)


Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here

# Name of the files used for checkpointing
TRAINING_ARGS_NAME = "training_args.bin"
TRAINER_STATE_NAME = "trainer_state.json"
Expand Down Expand Up @@ -2387,7 +2382,6 @@ def evaluation_loop(

# if eval is called w/o train init deepspeed here
if args.deepspeed and not self.deepspeed:

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And same here

# XXX: eval doesn't have `resume_from_checkpoint` arg but we should be able to do eval
# from the checkpoint eventually
deepspeed_engine, _, _ = deepspeed_init(
Expand Down Expand Up @@ -2433,10 +2427,13 @@ def evaluation_loop(
losses_host = None
preds_host = None
labels_host = None
inputs_host = None

# losses/preds/labels on CPU (final containers)
all_losses = None
all_preds = None
all_labels = None
all_inputs = None
# Will be useful when we have an iterable dataset so don't know its length.

observed_num_examples = 0
Expand All @@ -2452,6 +2449,7 @@ def evaluation_loop(

# Prediction step
loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys)
inputs_decode = inputs["input_ids"] if args.include_inputs_for_metrics else None

if is_torch_tpu_available():
xm.mark_step()
Expand All @@ -2464,6 +2462,14 @@ def evaluation_loop(
labels = self._pad_across_processes(labels)
labels = self._nested_gather(labels)
labels_host = labels if labels_host is None else nested_concat(labels_host, labels, padding_index=-100)
if inputs_decode is not None:
inputs_decode = self._pad_across_processes(inputs_decode)
inputs_decode = self._nested_gather(inputs_decode)
inputs_host = (
inputs_decode
if inputs_host is None
else nested_concat(inputs_host, inputs_decode, padding_index=-100)
)
if logits is not None:
logits = self._pad_across_processes(logits)
logits = self._nested_gather(logits)
Expand All @@ -2480,14 +2486,21 @@ def evaluation_loop(
if preds_host is not None:
logits = nested_numpify(preds_host)
all_preds = logits if all_preds is None else nested_concat(all_preds, logits, padding_index=-100)
if inputs_host is not None:
inputs_decode = nested_numpify(inputs_host)
all_inputs = (
inputs_decode
if all_inputs is None
else nested_concat(all_inputs, inputs_decode, padding_index=-100)
)
if labels_host is not None:
labels = nested_numpify(labels_host)
all_labels = (
labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
)

# Set back to None to begin a new accumulation
losses_host, preds_host, labels_host = None, None, None
losses_host, preds_host, inputs_host, labels_host = None, None, None, None

if args.past_index and hasattr(self, "_past"):
# Clean the state at the end of the evaluation loop
Expand All @@ -2500,6 +2513,11 @@ def evaluation_loop(
if preds_host is not None:
logits = nested_numpify(preds_host)
all_preds = logits if all_preds is None else nested_concat(all_preds, logits, padding_index=-100)
if inputs_host is not None:
inputs_decode = nested_numpify(inputs_host)
all_inputs = (
inputs_decode if all_inputs is None else nested_concat(all_inputs, inputs_decode, padding_index=-100)
)
if labels_host is not None:
labels = nested_numpify(labels_host)
all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
Expand All @@ -2522,10 +2540,17 @@ def evaluation_loop(
all_preds = nested_truncate(all_preds, num_samples)
if all_labels is not None:
all_labels = nested_truncate(all_labels, num_samples)
if all_inputs is not None:
all_inputs = nested_truncate(all_inputs, num_samples)

# Metrics!
if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
if args.include_inputs_for_metrics:
metrics = self.compute_metrics(
EvalPrediction(predictions=all_preds, label_ids=all_labels, inputs=all_inputs)
)
else:
metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
else:
metrics = {}

Expand Down Expand Up @@ -2905,7 +2930,6 @@ def prediction_loop(

# if eval is called w/o train init deepspeed here
if args.deepspeed and not self.deepspeed:

# XXX: eval doesn't have `resume_from_checkpoint` arg but we should be able to do eval
# from the checkpoint eventually
deepspeed_engine, _, _ = deepspeed_init(self, num_training_steps=0, resume_from_checkpoint=None)
Expand Down Expand Up @@ -2936,6 +2960,7 @@ def prediction_loop(
losses_host: torch.Tensor = None
preds_host: Union[torch.Tensor, List[torch.Tensor]] = None
labels_host: Union[torch.Tensor, List[torch.Tensor]] = None
inputs_host: Union[torch.Tensor, List[torch.Tensor]] = None

world_size = max(1, args.world_size)

Expand All @@ -2948,6 +2973,7 @@ def prediction_loop(
make_multiple_of = dataloader.sampler.batch_size
preds_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)
labels_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)
inputs_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)

model.eval()

Expand All @@ -2961,13 +2987,21 @@ def prediction_loop(

for step, inputs in enumerate(dataloader):
loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys)
inputs_decode = inputs["input_ids"] if args.include_inputs_for_metrics else None

if loss is not None:
losses = loss.repeat(batch_size)
losses_host = losses if losses_host is None else torch.cat((losses_host, losses), dim=0)
if logits is not None:
preds_host = logits if preds_host is None else nested_concat(preds_host, logits, padding_index=-100)
if labels is not None:
labels_host = labels if labels_host is None else nested_concat(labels_host, labels, padding_index=-100)
if inputs_decode is not None:
inputs_host = (
inputs_decode
if inputs_host is None
else nested_concat(inputs_host, inputs_decode, padding_index=-100)
)
self.control = self.callback_handler.on_prediction_step(args, self.state, self.control)

# Gather all tensors and put them back on the CPU if we have done enough accumulation steps.
Expand All @@ -2976,9 +3010,10 @@ def prediction_loop(
if not prediction_loss_only:
preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds"))
labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids"))
inputs_gatherer.add_arrays(self._gather_and_numpify(inputs_host, "eval_inputs_ids"))

# Set back to None to begin a new accumulation
losses_host, preds_host, labels_host = None, None, None
losses_host, preds_host, labels_host, inputs_host = None, None, None, None

if args.past_index and hasattr(self, "_past"):
# Clean the state at the end of the evaluation loop
Expand All @@ -2989,13 +3024,20 @@ def prediction_loop(
if not prediction_loss_only:
preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds"))
labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids"))
inputs_gatherer.add_arrays(self._gather_and_numpify(inputs_host, "eval_inputs_ids"))

eval_loss = eval_losses_gatherer.finalize()
preds = preds_gatherer.finalize() if not prediction_loss_only else None
label_ids = labels_gatherer.finalize() if not prediction_loss_only else None
inputs_ids = inputs_gatherer.finalize() if not prediction_loss_only else None

if self.compute_metrics is not None and preds is not None and label_ids is not None:
metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids))
if args.include_inputs_for_metrics:
metrics = self.compute_metrics(
EvalPrediction(predictions=preds, label_ids=label_ids, inputs=inputs_ids)
)
else:
metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids))
else:
metrics = {}

Expand Down
32 changes: 29 additions & 3 deletions src/transformers/trainer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,43 @@ def set_seed(seed: int):
tf.random.set_seed(seed)


class EvalPrediction(NamedTuple):
class EvalPrediction:
"""
Evaluation output (always contains labels), to be used to compute metrics.

Parameters:
predictions (`np.ndarray`): Predictions of the model.
label_ids (`np.ndarray`): Targets to be matched.
inputs (`np.ndarray`, *optional*)
"""

predictions: Union[np.ndarray, Tuple[np.ndarray]]
label_ids: Union[np.ndarray, Tuple[np.ndarray]]
def __init__(
self,
predictions: Union[np.ndarray, Tuple[np.ndarray]],
label_ids: Union[np.ndarray, Tuple[np.ndarray]],
inputs: Optional[Union[np.ndarray, Tuple[np.ndarray]]] = None,
):
self.predictions = predictions
self.label_ids = label_ids
self.inputs = inputs

def __iter__(self):
if self.inputs is not None:
return iter((self.predictions, self.label_ids, self.inputs))
else:
return iter((self.predictions, self.label_ids))

def __getitem__(self, idx):
if idx < 0 or idx > 2:
raise IndexError("tuple index out of range")
if idx == 2 and self.inputs is None:
raise IndexError("tuple index out of range")
if idx == 0:
return self.predictions
elif idx == 1:
return self.label_ids
elif idx == 2:
return self.inputs


class EvalLoopOutput(NamedTuple):
Expand Down
6 changes: 6 additions & 0 deletions src/transformers/training_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,9 @@ class TrainingArguments:
`huggingface-cli login`.
gradient_checkpointing (`bool`, *optional*, defaults to `False`):
If True, use gradient checkpointing to save memory at the expense of slower backward pass.
include_inputs_for_metrics (`bool`, *optional*, defaults to `False`):
Whether or not the inputs will be passed to the `compute_metrics` function. This is intended for
metrics that need inputs, predictions and references for scoring calculation in Metric class.
"""

output_dir: str = field(
Expand Down Expand Up @@ -740,6 +743,9 @@ class TrainingArguments:
"help": "If True, use gradient checkpointing to save memory at the expense of slower backward pass."
},
)
include_inputs_for_metrics: bool = field(
default=False, metadata={"help": "Whether or not the inputs will be passed to the `compute_metrics` function."}
)
# Deprecated arguments
fp16_backend: str = field(
default="auto",
Expand Down