From fd81746dbec5f17c8285a0fdc72ca4b4c025cc33 Mon Sep 17 00:00:00 2001
From: Jeffrey Huynh <jthuynh@amazon.com>
Date: Wed, 22 Mar 2023 04:32:17 +0000
Subject: [PATCH] Fix --bf16 option support for Neuron after PR #22300

This PR fixes the "RuntimeError: No CUDA GPUs are available"
when running with --bf16 option on Neuron.

Related PRs:
https://github.com/huggingface/transformers/pull/20684
https://github.com/huggingface/transformers/pull/22300
---
 src/transformers/trainer.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
index 1b3dc8055e4d..a5fe1c8c18a3 100755
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -585,7 +585,12 @@ def __init__(
 
         if args.fp16 or args.bf16:
             if args.half_precision_backend == "auto":
-                if args.device == torch.device("cpu"):
+                if is_torch_neuroncore_available():
+                    if args.fp16:
+                        raise ValueError("Tried to use `fp16` but this option is not yet supported on Neuron.")
+                    else:
+                        args.half_precision_backend = "cpu_amp"
+                elif args.device == torch.device("cpu"):
                     if args.fp16:
                         raise ValueError("Tried to use `fp16` but it is not supported on cpu")
                     elif _is_native_cpu_amp_available: