From 08e040b6991ab2e3d833d5ca020e94b7ef524a7d Mon Sep 17 00:00:00 2001
From: Yeonsil Yoon <yyoon@habana.ai>
Date: Wed, 5 Jun 2024 16:47:29 +0000
Subject: [PATCH] Update text-generation CI configuration for falcon and
 Mixtral

---
 tests/test_text_generation_example.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_text_generation_example.py b/tests/test_text_generation_example.py
index 2fde9f1bdc..0dba10c786 100644
--- a/tests/test_text_generation_example.py
+++ b/tests/test_text_generation_example.py
@@ -52,7 +52,7 @@
             ("mistralai/Mistral-7B-Instruct-v0.2", 1, 120, True, 128, 2048, 5394.675714459493),
             ("mistralai/Mistral-7B-Instruct-v0.2", 1, 120, True, 2048, 128, 919.8470890081497),
             ("mistralai/Mistral-7B-Instruct-v0.2", 1, 44, True, 2048, 2048, 2471.950758729518),
-            ("mistralai/Mixtral-8x7B-v0.1", 1, 1, False, 128, 128, 39.26845661768185),
+            ("mistralai/Mixtral-8x7B-v0.1", 1, 1, True, 128, 128, 39.26845661768185),
             ("microsoft/phi-2", 1, 1, True, 128, 128, 254.08932787178165),
         ],
         "deepspeed": [
@@ -133,7 +133,7 @@ def _test_text_generation(
     if "llama" in model_name.lower():
         command += ["--trim_logits", "--attn_softmax_bf16"]
 
-    if "falcon-180b" in model_name.lower():
+    if "falcon" in model_name.lower():
         command += ["--use_flash_attention", "--flash_attention_causal_mask"]
 
     if reuse_cache or torch_compile: