From 99e2f8ace843daef1a621926a31774de4474ef03 Mon Sep 17 00:00:00 2001 From: Miroslav Goncharenko Date: Sun, 1 Dec 2024 20:28:29 +0100 Subject: [PATCH] trainer: fixed spelling --- optimum/habana/transformers/trainer.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/optimum/habana/transformers/trainer.py b/optimum/habana/transformers/trainer.py index 99c38f0c2e..82b196c6c5 100644 --- a/optimum/habana/transformers/trainer.py +++ b/optimum/habana/transformers/trainer.py @@ -575,7 +575,7 @@ def _inner_training_loop( (self.model_wrapped,) = release_memory(self.model_wrapped) self.model_wrapped = self.model - # Check for DeepSpeed *after* the intial pass and modify the config + # Check for DeepSpeed *after* the initial pass and modify the config if self.is_deepspeed_enabled: # Temporarily unset `self.args.train_batch_size` original_bs = self.args.per_device_train_batch_size @@ -686,14 +686,14 @@ def _inner_training_loop( # HACK because outputs should always be tuples def hpu_deepspeed_checkpointing(function, *checkpoint_args, use_reentrant: Optional[bool] = None): - """DeepSpeed acitvation checkpointing.""" + """DeepSpeed activation checkpointing.""" if use_reentrant is None: use_reentrant = True if use_reentrant: all_outputs = [] CheckpointFunction.apply(function, all_outputs, *checkpoint_args) else: - logger.info("DeepSpeed acitvation checkpointing=non_reentrant_checkpoint") + logger.info("DeepSpeed activation checkpointing=non_reentrant_checkpoint") all_outputs = non_reentrant_checkpoint(function, *checkpoint_args) # Always return a tuple @@ -863,7 +863,7 @@ def hpu_deepspeed_checkpointing(function, *checkpoint_args, use_reentrant: Optio # tr_loss is a tensor to avoid synchronization of TPUs through .item() tr_loss = torch.tensor(0.0).to(args.device) - # _total_loss_scalar is updated everytime .item() has to be called on tr_loss and stores the sum of all losses + # _total_loss_scalar is updated every time .item() has to be called on tr_loss and stores the sum of all losses self._total_loss_scalar = 0.0 self._globalstep_last_logged = self.state.global_step self._zero_model_grad(model) @@ -1433,7 +1433,7 @@ def _save_optimizer_and_scheduler(self, output_dir): ) elif self.args.should_save: # deepspeed.save_checkpoint above saves model/optim/sched - # This block is exectuted by the main process only + # This block is executed by the main process only optim_dict = self.optimizer.state_dict() if self.args.use_habana: # Move the state dict from HPU to CPU before saving @@ -1599,7 +1599,7 @@ def training_step(self, model: torch.nn.Module, inputs: Dict[str, Union[torch.Te del inputs kwargs = {} - # For LOMO optimizers you need to explicitly use the learnign rate + # For LOMO optimizers you need to explicitly use the learning rate if self.args.optim in [OptimizerNames.LOMO, OptimizerNames.ADALOMO]: kwargs["learning_rate"] = self._get_learning_rate() @@ -1725,7 +1725,7 @@ def evaluate( From https://github.com/huggingface/transformers/blob/v4.38.2/src/transformers/trainer.py#L3162 with the following modification 1. use throughput_warmup_steps in evaluation throughput calculation """ - # handle multipe eval datasets + # handle multiple eval datasets override = eval_dataset is not None eval_dataset = eval_dataset if override else self.eval_dataset if isinstance(eval_dataset, dict):