From 852f7bdaaad10034a24b68088445c430b6a69914 Mon Sep 17 00:00:00 2001 From: Harish Subramony Date: Wed, 4 Dec 2024 15:06:52 -0800 Subject: [PATCH 1/4] add sdp_on_bf16 for tests, text_gen --- examples/text-generation/run_generation.py | 6 ++++++ tests/test_examples.py | 9 +++++++++ tests/test_fsdp_examples.py | 2 ++ tests/test_text_generation_example.py | 4 ++++ 4 files changed, 21 insertions(+) diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py index 4b2ab96842..a057261b32 100755 --- a/examples/text-generation/run_generation.py +++ b/examples/text-generation/run_generation.py @@ -320,6 +320,9 @@ def setup_parser(parser): action="store_true", help="Run the inference with dataset for specified --n_iterations(default:5)", ) + parser.add_argument( + "--sdp_on_bf16", action="store_true", help="Allow pyTorch to use reduced precision in the SDPA math backend" + ) quant_parser_group = parser.add_mutually_exclusive_group() quant_parser_group.add_argument( @@ -389,6 +392,9 @@ def main(): import habana_frameworks.torch.hpu as torch_hpu + if args.sdp_on_bf16: + torch._C._set_math_sdp_allow_fp16_bf16_reduction(True) + if args.dataset_name is None: # Benchmark over the prompts below if args.prompt: diff --git a/tests/test_examples.py b/tests/test_examples.py index b6f07b0512..e9d8e57852 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -520,6 +520,15 @@ def test(self): env_variables["PT_HPU_LAZY_MODE"] = "0" env_variables["PT_ENABLE_INT64_SUPPORT"] = "1" + if self.EXAMPLE_NAME == "run_glue": + if model_name == "bert-large-uncased-whole-word-masking": + extra_command_line_arguments.append("--sdp_on_bf16") + + if self.EXAMPLE_NAME == "run_qa": + if model_name == "bert-large-uncased-whole-word-masking" or \ + model_name == "albert-large-v2": + extra_command_line_arguments.append("--sdp_on_bf16") + with TemporaryDirectory() as tmp_dir: cmd_line = self._create_command_line( multi_card, diff --git a/tests/test_fsdp_examples.py b/tests/test_fsdp_examples.py index 6335f28ebf..a0197834ef 100644 --- a/tests/test_fsdp_examples.py +++ b/tests/test_fsdp_examples.py @@ -97,6 +97,7 @@ def _test_fsdp( f"--gaudi_config_name {gaudi_config}", "--throughput_warmup_steps 100", "--do_eval", + "--sdp_on_bf16", ] else: command += [ @@ -126,6 +127,7 @@ def _test_fsdp( "--use_flash_attention True", "--flash_attention_causal_mask True", f"--token {token.value}", + "--sdp_on_bf16", ] with TemporaryDirectory() as tmp_dir: diff --git a/tests/test_text_generation_example.py b/tests/test_text_generation_example.py index 1fcadba9b0..0fddd2a746 100644 --- a/tests/test_text_generation_example.py +++ b/tests/test_text_generation_example.py @@ -221,6 +221,10 @@ def _test_text_generation( if "gemma" in model_name.lower(): command += ["--use_flash_attention"] + command += ["--sdp_on_bf16"] + + if "decilm" in model_name.lower(): + command += ["--sdp_on_bf16"] if (reuse_cache or torch_compile) and not parallel_strategy == "tp" and not is_starcoder_first_gen_model: command += ["--reuse_cache"] From 957195902aeceafc678c5071fdf5860f5bb6419c Mon Sep 17 00:00:00 2001 From: Harish Subramony Date: Thu, 5 Dec 2024 20:00:18 -0800 Subject: [PATCH 2/4] add sdp_on_bf16 to run_pipeline, more tests --- examples/image-to-text/run_pipeline.py | 6 ++++++ tests/test_encoder_decoder.py | 3 +++ tests/test_examples.py | 11 +++++++++++ tests/test_image_to_text_example.py | 5 +++++ tests/test_text_generation_example.py | 3 +++ 5 files changed, 28 insertions(+) diff --git a/examples/image-to-text/run_pipeline.py b/examples/image-to-text/run_pipeline.py index ae76fdbe55..6ab1314eb9 100644 --- a/examples/image-to-text/run_pipeline.py +++ b/examples/image-to-text/run_pipeline.py @@ -174,9 +174,15 @@ def main(): action="store_true", help="Whether to use the key/value cache for decoding. It should speed up generation.", ) + parser.add_argument( + "--sdp_on_bf16", action="store_true", help="Allow pyTorch to use reduced precision in the SDPA math backend" + ) args = parser.parse_args() + if args.sdp_on_bf16: + torch._C._set_math_sdp_allow_fp16_bf16_reduction(True) + # set args.quant_config with env variable if it is set args.quant_config = os.getenv("QUANT_CONFIG", "") diff --git a/tests/test_encoder_decoder.py b/tests/test_encoder_decoder.py index 27dd1b75c2..20d808b69f 100644 --- a/tests/test_encoder_decoder.py +++ b/tests/test_encoder_decoder.py @@ -206,6 +206,9 @@ def _test_text_translation( if "opus-mt-zh-en" in model_name: command_args.append("--max_source_length 512") + if "Babelscape/mrebel-large" in model_name or "nllb-200-distilled-600M" in model_name: + command_args.append("--sdp_on_bf16") + command = self._build_command( task=task, deepspeed=deepspeed, diff --git a/tests/test_examples.py b/tests/test_examples.py index e9d8e57852..37d1fb8d8a 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -529,6 +529,17 @@ def test(self): model_name == "albert-large-v2": extra_command_line_arguments.append("--sdp_on_bf16") + if self.EXAMPLE_NAME == "run_bridgetower": + if model_name == "BridgeTower/bridgetower-large-itm-mlm-itc": + extra_command_line_arguments.append("--sdp_on_bf16") + + if self.EXAMPLE_NAME == "run_speech_recognition_seq2seq": + if model_name == "openai/whisper-small": + extra_command_line_arguments.append("--sdp_on_bf16") + + if self.EXAMPLE_NAME == "run_clip": + extra_command_line_arguments.append("--sdp_on_bf16") + with TemporaryDirectory() as tmp_dir: cmd_line = self._create_command_line( multi_card, diff --git a/tests/test_image_to_text_example.py b/tests/test_image_to_text_example.py index e8aa562907..8e78707219 100644 --- a/tests/test_image_to_text_example.py +++ b/tests/test_image_to_text_example.py @@ -67,6 +67,11 @@ def _test_image_to_text( "--use_hpu_graphs", ] + if "meta-llama/Llama-3.2-11B-Vision-Instruct" in model_name or "tiiuae/falcon-11B-vlm" in model_name: + command += [ + "--sdp_on_bf16", + ] + command.append("--bf16") with TemporaryDirectory() as tmp_dir: diff --git a/tests/test_text_generation_example.py b/tests/test_text_generation_example.py index 0fddd2a746..df6c604b69 100644 --- a/tests/test_text_generation_example.py +++ b/tests/test_text_generation_example.py @@ -226,6 +226,9 @@ def _test_text_generation( if "decilm" in model_name.lower(): command += ["--sdp_on_bf16"] + if "mamba-130m-hf" in model_name.lower(): + command += ["--sdp_on_bf16"] + if (reuse_cache or torch_compile) and not parallel_strategy == "tp" and not is_starcoder_first_gen_model: command += ["--reuse_cache"] From c59935d74599d56439a01cd2000aeff43ec4b835 Mon Sep 17 00:00:00 2001 From: Harish Subramony Date: Fri, 6 Dec 2024 10:13:29 -0800 Subject: [PATCH 3/4] code cleanup --- tests/test_fsdp_examples.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_fsdp_examples.py b/tests/test_fsdp_examples.py index a0197834ef..3e04d95f14 100644 --- a/tests/test_fsdp_examples.py +++ b/tests/test_fsdp_examples.py @@ -97,7 +97,6 @@ def _test_fsdp( f"--gaudi_config_name {gaudi_config}", "--throughput_warmup_steps 100", "--do_eval", - "--sdp_on_bf16", ] else: command += [ @@ -127,9 +126,10 @@ def _test_fsdp( "--use_flash_attention True", "--flash_attention_causal_mask True", f"--token {token.value}", - "--sdp_on_bf16", ] + command.append("--sdp_on_bf16") + with TemporaryDirectory() as tmp_dir: command.append(f"--output_dir {tmp_dir}") print(f"\n\nCommand to test: {' '.join(command)}\n") From ae719ad57fc44dafae69fc0cb20a257f159f8a40 Mon Sep 17 00:00:00 2001 From: Harish Subramony Date: Fri, 6 Dec 2024 11:54:45 -0800 Subject: [PATCH 4/4] review cleanup --- examples/image-to-text/run_pipeline.py | 6 ------ tests/test_fsdp_examples.py | 3 +-- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/examples/image-to-text/run_pipeline.py b/examples/image-to-text/run_pipeline.py index 6ab1314eb9..ae76fdbe55 100644 --- a/examples/image-to-text/run_pipeline.py +++ b/examples/image-to-text/run_pipeline.py @@ -174,15 +174,9 @@ def main(): action="store_true", help="Whether to use the key/value cache for decoding. It should speed up generation.", ) - parser.add_argument( - "--sdp_on_bf16", action="store_true", help="Allow pyTorch to use reduced precision in the SDPA math backend" - ) args = parser.parse_args() - if args.sdp_on_bf16: - torch._C._set_math_sdp_allow_fp16_bf16_reduction(True) - # set args.quant_config with env variable if it is set args.quant_config = os.getenv("QUANT_CONFIG", "") diff --git a/tests/test_fsdp_examples.py b/tests/test_fsdp_examples.py index 3e04d95f14..180a2bb3f9 100644 --- a/tests/test_fsdp_examples.py +++ b/tests/test_fsdp_examples.py @@ -97,6 +97,7 @@ def _test_fsdp( f"--gaudi_config_name {gaudi_config}", "--throughput_warmup_steps 100", "--do_eval", + "--sdp_on_bf16", ] else: command += [ @@ -128,8 +129,6 @@ def _test_fsdp( f"--token {token.value}", ] - command.append("--sdp_on_bf16") - with TemporaryDirectory() as tmp_dir: command.append(f"--output_dir {tmp_dir}") print(f"\n\nCommand to test: {' '.join(command)}\n")