huggingface · regisss · Jan 3, 2024 · Dec 21, 2023 · Dec 27, 2023 · Dec 27, 2023
@@ -227,7 +227,8 @@ python run_summarization.py \
     --gaudi_config_name Habana/t5 \
     --ignore_pad_token_for_loss False \
     --pad_to_max_length \
-    --bf16
+    --bf16 \
+    --bf16_full_eval
 ```
 
 You can run inference with BART on the CNN-DailyMail dataset on 1 Gaudi card with the following command:

@@ -428,6 +428,11 @@ def train(
 
         self.is_in_train = True
 
+        # do_train is not a reliable argument, as it might not be set and .train() still called, so
+        # the following is a workaround:
+        if (args.fp16_full_eval or args.bf16_full_eval) and not args.do_train:
+            self._move_model_to_device(self.model, args.device)
+
         if "model_path" in kwargs:
             resume_from_checkpoint = kwargs.pop("model_path")
             warnings.warn(
@@ -1510,6 +1515,14 @@ def evaluation_loop(
                 )
                 self.already_wrapped_for_hpu_graphs = True
 
+        # if full fp16 or bf16 eval is wanted and this ``evaluation`` or ``predict`` isn't called
+        # while ``train`` is running, cast it to the right dtype first and then put on device
+        if not self.is_in_train:
+            if args.fp16_full_eval:
+                model = model.to(dtype=torch.float16, device=args.device)
+            elif args.bf16_full_eval:
+                model = model.to(dtype=torch.bfloat16, device=args.device)
+
         batch_size = self.args.eval_batch_size
 
         logger.info(f"***** Running {description} *****")
@@ -1903,6 +1916,14 @@ def prediction_loop(
                 )
                 self.already_wrapped_for_hpu_graphs = True
 
+        # if full fp16 or bf16 eval is wanted and this ``evaluation`` or ``predict`` isn't called
+        # while ``train`` is running, cast it to the right dtype first and then put on device
+        if not self.is_in_train:
+            if args.fp16_full_eval:
+                model = model.to(dtype=torch.float16, device=args.device)
+            elif args.bf16_full_eval:
+                model = model.to(dtype=torch.bfloat16, device=args.device)
+
         batch_size = dataloader.batch_size
         num_examples = self.num_examples(dataloader)
         logger.info(f"***** Running {description} *****")

@@ -54,7 +54,6 @@
 
 # List of arguments that are not supported by optimum-habana
 UNSUPPORTED_ARGUMENTS = [
-    "bf16_full_eval",
     "fp16",
     "fp16_backend",
     "fp16_full_eval",
@@ -314,8 +313,6 @@ def __post_init__(self):
             raise ValueError("must be using hpu graphs to set max_hpu_graphs.")
 
         # Raise errors for arguments that are not supported by optimum-habana
-        if self.bf16_full_eval:
-            raise ValueError("--bf16_full_eval is not supported by optimum-habana.")
         if self.fp16 or self.fp16_full_eval:
             raise ValueError(
                 "--fp16, --fp16_backend, --fp16_full_eval and --fp16_opt_level are not"

@@ -15,15 +15,15 @@
     MODELS_TO_TEST = {
         "bf16": [
             ("facebook/bart-large-cnn", "Habana/bart", 4.691, 26.0688, 2, 1),
-            ("t5-3b", "Habana/t5", 2.28, 21.56, 2, 1),
+            ("t5-3b", "Habana/t5", 2.88, 21.56, 2, 1),
         ],
     }
 else:
     # Gaudi1 CI baselines
     MODELS_TO_TEST = {
         "bf16": [
             ("facebook/bart-large-cnn", "Habana/bart", 2.588, 26.0688, 2, 1),
-            ("t5-3b", "Habana/t5", 0.585, 21.72, 2, 1),
+            ("t5-3b", "Habana/t5", 0.98, 21.56, 2, 1),
         ],
     }
 
@@ -76,6 +76,8 @@ def _test_text_summarization(
 
     if not deepspeed:
         command.append("--bf16")
+        if model_name == "t5-3b":
+            command.append("--bf16_full_eval")
 
     with TemporaryDirectory() as tmp_dir:
         command.append(f"--output_dir {tmp_dir}")