From fd81746dbec5f17c8285a0fdc72ca4b4c025cc33 Mon Sep 17 00:00:00 2001 From: Jeffrey Huynh Date: Wed, 22 Mar 2023 04:32:17 +0000 Subject: [PATCH] Fix --bf16 option support for Neuron after PR #22300 This PR fixes the "RuntimeError: No CUDA GPUs are available" when running with --bf16 option on Neuron. Related PRs: https://github.com/huggingface/transformers/pull/20684 https://github.com/huggingface/transformers/pull/22300 --- src/transformers/trainer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 1b3dc8055e4d..a5fe1c8c18a3 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -585,7 +585,12 @@ def __init__( if args.fp16 or args.bf16: if args.half_precision_backend == "auto": - if args.device == torch.device("cpu"): + if is_torch_neuroncore_available(): + if args.fp16: + raise ValueError("Tried to use `fp16` but this option is not yet supported on Neuron.") + else: + args.half_precision_backend = "cpu_amp" + elif args.device == torch.device("cpu"): if args.fp16: raise ValueError("Tried to use `fp16` but it is not supported on cpu") elif _is_native_cpu_amp_available: