From a1578e5ef91d8b22a669d9adb4a2414aa4aee72c Mon Sep 17 00:00:00 2001 From: Jimin Ha Date: Tue, 12 Nov 2024 16:44:47 -0800 Subject: [PATCH 1/2] Add compile time/warmup time calcuation for eval/predict --- optimum/habana/transformers/trainer.py | 51 ++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/optimum/habana/transformers/trainer.py b/optimum/habana/transformers/trainer.py index 27eab623bb..3ed0eaf077 100644 --- a/optimum/habana/transformers/trainer.py +++ b/optimum/habana/transformers/trainer.py @@ -72,6 +72,7 @@ HPSearchBackend, HubStrategy, IntervalStrategy, + PredictionOutput, TrainOutput, denumpify_detensorize, enable_full_determinism, @@ -1781,6 +1782,51 @@ def evaluate( return output.metrics + def predict( + self, test_dataset: Dataset, ignore_keys: Optional[List[str]] = None, metric_key_prefix: str = "test" + ) -> PredictionOutput: + """ + From https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/trainer.py#L3904 with the following modification + 1. comment out TPU related + 2. use throughput_warmup_steps in evaluation throughput calculation + """ + # memory metrics - must set up as early as possible + self._memory_tracker.start() + + test_dataloader = self.get_test_dataloader(test_dataset) + start_time = time.time() + self.start_time_after_warmup = None + + eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop + output = eval_loop( + test_dataloader, description="Prediction", ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix + ) + total_batch_size = self.args.eval_batch_size * self.args.world_size + if f"{metric_key_prefix}_jit_compilation_time" in output.metrics: + start_time += output.metrics[f"{metric_key_prefix}_jit_compilation_time"] + if f"{metric_key_prefix}_model_preparation_time" in output.metrics: + start_time += output.metrics[f"{metric_key_prefix}_model_preparation_time"] + + num_samples = output.num_samples - self.args.throughput_warmup_steps * total_batch_size + num_steps = math.ceil(output.num_samples / total_batch_size) - self.args.throughput_warmup_steps + + logger.info(f"num_samples : {num_samples}, num_steps: {num_steps}") + + output.metrics.update( + speed_metrics( + metric_key_prefix, + start_time, + num_samples=num_samples, + num_steps=num_steps, + start_time_after_warmup=self.start_time_after_warmup, + ) + ) + + self.control = self.callback_handler.on_predict(self.args, self.state, self.control, output.metrics) + self._memory_tracker.stop_and_update_metrics(output.metrics) + + return PredictionOutput(predictions=output.predictions, label_ids=output.label_ids, metrics=output.metrics) + def evaluation_loop( self, dataloader: DataLoader, @@ -1873,6 +1919,7 @@ def evaluation_loop( observed_num_examples = 0 # Main evaluation loop + start_time_eval = time.time() for step, inputs in enumerate(dataloader): if ( self.args.throughput_warmup_steps > 0 @@ -1880,6 +1927,8 @@ def evaluation_loop( and step == self.args.throughput_warmup_steps ): self.start_time_after_warmup = time.time() + self.compilation_time = self.start_time_after_warmup - start_time_eval + # Update the observed num examples observed_batch_size = find_batch_size(inputs) if observed_batch_size is not None: @@ -2022,6 +2071,8 @@ def evaluation_loop( metrics[f"{metric_key_prefix}_loss"] = all_losses.mean().item() if hasattr(self, "model_preparation_time"): metrics[f"{metric_key_prefix}_model_preparation_time"] = self.model_preparation_time + if hasattr(self, "compilation_time"): + metrics[f"{metric_key_prefix}_graph_compliation_duration"] = self.compilation_time # Prefix all keys with metric_key_prefix + '_' for key in list(metrics.keys()): From 7b3cc2a88151532575c172f52916d28e8248badd Mon Sep 17 00:00:00 2001 From: Jimin Ha Date: Thu, 14 Nov 2024 21:02:29 +0000 Subject: [PATCH 2/2] Fix comment --- optimum/habana/transformers/trainer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/optimum/habana/transformers/trainer.py b/optimum/habana/transformers/trainer.py index 3ed0eaf077..f4b231285e 100644 --- a/optimum/habana/transformers/trainer.py +++ b/optimum/habana/transformers/trainer.py @@ -1720,8 +1720,7 @@ def evaluate( ) -> Dict[str, float]: """ From https://github.com/huggingface/transformers/blob/v4.38.2/src/transformers/trainer.py#L3162 with the following modification - 1. comment out TPU related - 2. use throughput_warmup_steps in evaluation throughput calculation + 1. use throughput_warmup_steps in evaluation throughput calculation """ # handle multipe eval datasets override = eval_dataset is not None