From 08e040b6991ab2e3d833d5ca020e94b7ef524a7d Mon Sep 17 00:00:00 2001 From: Yeonsil Yoon Date: Wed, 5 Jun 2024 16:47:29 +0000 Subject: [PATCH] Update text-generation CI configuration for falcon and Mixtral --- tests/test_text_generation_example.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_text_generation_example.py b/tests/test_text_generation_example.py index 2fde9f1bdc..0dba10c786 100644 --- a/tests/test_text_generation_example.py +++ b/tests/test_text_generation_example.py @@ -52,7 +52,7 @@ ("mistralai/Mistral-7B-Instruct-v0.2", 1, 120, True, 128, 2048, 5394.675714459493), ("mistralai/Mistral-7B-Instruct-v0.2", 1, 120, True, 2048, 128, 919.8470890081497), ("mistralai/Mistral-7B-Instruct-v0.2", 1, 44, True, 2048, 2048, 2471.950758729518), - ("mistralai/Mixtral-8x7B-v0.1", 1, 1, False, 128, 128, 39.26845661768185), + ("mistralai/Mixtral-8x7B-v0.1", 1, 1, True, 128, 128, 39.26845661768185), ("microsoft/phi-2", 1, 1, True, 128, 128, 254.08932787178165), ], "deepspeed": [ @@ -133,7 +133,7 @@ def _test_text_generation( if "llama" in model_name.lower(): command += ["--trim_logits", "--attn_softmax_bf16"] - if "falcon-180b" in model_name.lower(): + if "falcon" in model_name.lower(): command += ["--use_flash_attention", "--flash_attention_causal_mask"] if reuse_cache or torch_compile: