-
Notifications
You must be signed in to change notification settings - Fork 33.6k
Add inputs vector to calculate metric method #16461
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
395bd38
c9b0686
3ba1d57
e1fb65e
a4e09da
db877dc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,8 +30,21 @@ | |
| from pathlib import Path | ||
| from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union | ||
|
|
||
| import numpy as np | ||
| import torch | ||
| from huggingface_hub import Repository | ||
| from packaging import version | ||
| from torch import nn | ||
| from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler | ||
| from torch.utils.data.distributed import DistributedSampler | ||
| from tqdm.auto import tqdm | ||
|
|
||
| from . import __version__ | ||
| from .configuration_utils import PretrainedConfig | ||
| from .data.data_collator import DataCollator, DataCollatorWithPadding, default_data_collator | ||
| from .debug_utils import DebugOption, DebugUnderflowOverflow | ||
| from .deepspeed import deepspeed_init, deepspeed_reinit, is_deepspeed_zero3_enabled | ||
| from .dependency_versions_check import dep_version_check | ||
|
|
||
| # Integrations must be imported before ML frameworks: | ||
| from .integrations import ( # isort: split | ||
|
|
@@ -48,22 +61,6 @@ | |
| run_hp_search_sigopt, | ||
| run_hp_search_wandb, | ||
| ) | ||
|
|
||
| import numpy as np | ||
| import torch | ||
| from packaging import version | ||
| from torch import nn | ||
| from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler | ||
| from torch.utils.data.distributed import DistributedSampler | ||
|
|
||
| from huggingface_hub import Repository | ||
|
|
||
| from . import __version__ | ||
| from .configuration_utils import PretrainedConfig | ||
| from .data.data_collator import DataCollator, DataCollatorWithPadding, default_data_collator | ||
| from .debug_utils import DebugOption, DebugUnderflowOverflow | ||
| from .deepspeed import deepspeed_init, deepspeed_reinit, is_deepspeed_zero3_enabled | ||
| from .dependency_versions_check import dep_version_check | ||
| from .modelcard import TrainingSummary | ||
| from .modeling_utils import PreTrainedModel, unwrap_model | ||
| from .models.auto.modeling_auto import MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES | ||
|
|
@@ -181,13 +178,11 @@ | |
|
|
||
| from .trainer_pt_utils import smp_forward_backward, smp_forward_only, smp_gather, smp_nested_concat | ||
|
|
||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need to remove this line. |
||
| if TYPE_CHECKING: | ||
| import optuna | ||
|
|
||
| logger = logging.get_logger(__name__) | ||
|
|
||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here |
||
| # Name of the files used for checkpointing | ||
| TRAINING_ARGS_NAME = "training_args.bin" | ||
| TRAINER_STATE_NAME = "trainer_state.json" | ||
|
|
@@ -2387,7 +2382,6 @@ def evaluation_loop( | |
|
|
||
| # if eval is called w/o train init deepspeed here | ||
| if args.deepspeed and not self.deepspeed: | ||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And same here |
||
| # XXX: eval doesn't have `resume_from_checkpoint` arg but we should be able to do eval | ||
| # from the checkpoint eventually | ||
| deepspeed_engine, _, _ = deepspeed_init( | ||
|
|
@@ -2433,10 +2427,13 @@ def evaluation_loop( | |
| losses_host = None | ||
| preds_host = None | ||
| labels_host = None | ||
| inputs_host = None | ||
|
|
||
| # losses/preds/labels on CPU (final containers) | ||
| all_losses = None | ||
| all_preds = None | ||
| all_labels = None | ||
| all_inputs = None | ||
| # Will be useful when we have an iterable dataset so don't know its length. | ||
|
|
||
| observed_num_examples = 0 | ||
|
|
@@ -2452,6 +2449,7 @@ def evaluation_loop( | |
|
|
||
| # Prediction step | ||
| loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys) | ||
| inputs_decode = inputs["input_ids"] if args.include_inputs_for_metrics else None | ||
|
|
||
| if is_torch_tpu_available(): | ||
| xm.mark_step() | ||
|
|
@@ -2464,6 +2462,14 @@ def evaluation_loop( | |
| labels = self._pad_across_processes(labels) | ||
| labels = self._nested_gather(labels) | ||
| labels_host = labels if labels_host is None else nested_concat(labels_host, labels, padding_index=-100) | ||
| if inputs_decode is not None: | ||
| inputs_decode = self._pad_across_processes(inputs_decode) | ||
| inputs_decode = self._nested_gather(inputs_decode) | ||
| inputs_host = ( | ||
| inputs_decode | ||
| if inputs_host is None | ||
| else nested_concat(inputs_host, inputs_decode, padding_index=-100) | ||
| ) | ||
| if logits is not None: | ||
| logits = self._pad_across_processes(logits) | ||
| logits = self._nested_gather(logits) | ||
|
|
@@ -2480,14 +2486,21 @@ def evaluation_loop( | |
| if preds_host is not None: | ||
| logits = nested_numpify(preds_host) | ||
| all_preds = logits if all_preds is None else nested_concat(all_preds, logits, padding_index=-100) | ||
| if inputs_host is not None: | ||
| inputs_decode = nested_numpify(inputs_host) | ||
| all_inputs = ( | ||
| inputs_decode | ||
| if all_inputs is None | ||
| else nested_concat(all_inputs, inputs_decode, padding_index=-100) | ||
| ) | ||
| if labels_host is not None: | ||
| labels = nested_numpify(labels_host) | ||
| all_labels = ( | ||
| labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100) | ||
| ) | ||
|
|
||
| # Set back to None to begin a new accumulation | ||
| losses_host, preds_host, labels_host = None, None, None | ||
| losses_host, preds_host, inputs_host, labels_host = None, None, None, None | ||
|
|
||
| if args.past_index and hasattr(self, "_past"): | ||
| # Clean the state at the end of the evaluation loop | ||
|
|
@@ -2500,6 +2513,11 @@ def evaluation_loop( | |
| if preds_host is not None: | ||
| logits = nested_numpify(preds_host) | ||
| all_preds = logits if all_preds is None else nested_concat(all_preds, logits, padding_index=-100) | ||
| if inputs_host is not None: | ||
| inputs_decode = nested_numpify(inputs_host) | ||
| all_inputs = ( | ||
| inputs_decode if all_inputs is None else nested_concat(all_inputs, inputs_decode, padding_index=-100) | ||
| ) | ||
| if labels_host is not None: | ||
| labels = nested_numpify(labels_host) | ||
| all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100) | ||
|
|
@@ -2522,10 +2540,17 @@ def evaluation_loop( | |
| all_preds = nested_truncate(all_preds, num_samples) | ||
| if all_labels is not None: | ||
| all_labels = nested_truncate(all_labels, num_samples) | ||
| if all_inputs is not None: | ||
| all_inputs = nested_truncate(all_inputs, num_samples) | ||
|
|
||
| # Metrics! | ||
| if self.compute_metrics is not None and all_preds is not None and all_labels is not None: | ||
| metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels)) | ||
| if args.include_inputs_for_metrics: | ||
| metrics = self.compute_metrics( | ||
| EvalPrediction(predictions=all_preds, label_ids=all_labels, inputs=all_inputs) | ||
| ) | ||
| else: | ||
| metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels)) | ||
| else: | ||
| metrics = {} | ||
|
|
||
|
|
@@ -2905,7 +2930,6 @@ def prediction_loop( | |
|
|
||
| # if eval is called w/o train init deepspeed here | ||
| if args.deepspeed and not self.deepspeed: | ||
|
|
||
| # XXX: eval doesn't have `resume_from_checkpoint` arg but we should be able to do eval | ||
| # from the checkpoint eventually | ||
| deepspeed_engine, _, _ = deepspeed_init(self, num_training_steps=0, resume_from_checkpoint=None) | ||
|
|
@@ -2936,6 +2960,7 @@ def prediction_loop( | |
| losses_host: torch.Tensor = None | ||
| preds_host: Union[torch.Tensor, List[torch.Tensor]] = None | ||
| labels_host: Union[torch.Tensor, List[torch.Tensor]] = None | ||
| inputs_host: Union[torch.Tensor, List[torch.Tensor]] = None | ||
|
|
||
| world_size = max(1, args.world_size) | ||
|
|
||
|
|
@@ -2948,6 +2973,7 @@ def prediction_loop( | |
| make_multiple_of = dataloader.sampler.batch_size | ||
| preds_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of) | ||
| labels_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of) | ||
| inputs_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of) | ||
|
|
||
| model.eval() | ||
|
|
||
|
|
@@ -2961,13 +2987,21 @@ def prediction_loop( | |
|
|
||
| for step, inputs in enumerate(dataloader): | ||
| loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys) | ||
| inputs_decode = inputs["input_ids"] if args.include_inputs_for_metrics else None | ||
|
|
||
| if loss is not None: | ||
| losses = loss.repeat(batch_size) | ||
| losses_host = losses if losses_host is None else torch.cat((losses_host, losses), dim=0) | ||
| if logits is not None: | ||
| preds_host = logits if preds_host is None else nested_concat(preds_host, logits, padding_index=-100) | ||
| if labels is not None: | ||
| labels_host = labels if labels_host is None else nested_concat(labels_host, labels, padding_index=-100) | ||
| if inputs_decode is not None: | ||
| inputs_host = ( | ||
| inputs_decode | ||
| if inputs_host is None | ||
| else nested_concat(inputs_host, inputs_decode, padding_index=-100) | ||
| ) | ||
| self.control = self.callback_handler.on_prediction_step(args, self.state, self.control) | ||
|
|
||
| # Gather all tensors and put them back on the CPU if we have done enough accumulation steps. | ||
|
|
@@ -2976,9 +3010,10 @@ def prediction_loop( | |
| if not prediction_loss_only: | ||
| preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds")) | ||
| labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids")) | ||
| inputs_gatherer.add_arrays(self._gather_and_numpify(inputs_host, "eval_inputs_ids")) | ||
|
|
||
| # Set back to None to begin a new accumulation | ||
| losses_host, preds_host, labels_host = None, None, None | ||
| losses_host, preds_host, labels_host, inputs_host = None, None, None, None | ||
|
|
||
| if args.past_index and hasattr(self, "_past"): | ||
| # Clean the state at the end of the evaluation loop | ||
|
|
@@ -2989,13 +3024,20 @@ def prediction_loop( | |
| if not prediction_loss_only: | ||
| preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds")) | ||
| labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids")) | ||
| inputs_gatherer.add_arrays(self._gather_and_numpify(inputs_host, "eval_inputs_ids")) | ||
|
|
||
| eval_loss = eval_losses_gatherer.finalize() | ||
| preds = preds_gatherer.finalize() if not prediction_loss_only else None | ||
| label_ids = labels_gatherer.finalize() if not prediction_loss_only else None | ||
| inputs_ids = inputs_gatherer.finalize() if not prediction_loss_only else None | ||
|
|
||
| if self.compute_metrics is not None and preds is not None and label_ids is not None: | ||
| metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids)) | ||
| if args.include_inputs_for_metrics: | ||
| metrics = self.compute_metrics( | ||
| EvalPrediction(predictions=preds, label_ids=label_ids, inputs=inputs_ids) | ||
| ) | ||
| else: | ||
| metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids)) | ||
| else: | ||
| metrics = {} | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This shouldn't be changed by
make style. Are you sure you have the proper version of the formatting tools installed (pip install .[quality]in the repo)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I'm sorry. I tried to install manually some of the dependencies, but it was still failing some parts. I've done the changes to bring it back to its original state, adding your also the changes from your empty lines.