From 94a706fba5673cbce185c8216b9930e31597570a Mon Sep 17 00:00:00 2001 From: Iman Gohari Date: Tue, 16 Sep 2025 17:35:17 +0000 Subject: [PATCH] fea(ci): Updated the fixtures based on changes in PR#2246 --- .../tests/test_text_generation_example.json | 200 +++++++++++++----- 1 file changed, 149 insertions(+), 51 deletions(-) diff --git a/tests/baselines/fixture/tests/test_text_generation_example.json b/tests/baselines/fixture/tests/test_text_generation_example.json index 67a0343d97..3f2b24c553 100644 --- a/tests/baselines/fixture/tests/test_text_generation_example.json +++ b/tests/baselines/fixture/tests/test_text_generation_example.json @@ -31,7 +31,7 @@ "throughput": 115 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[EleutherAI/gpt-j-6b-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[EleutherAI/gpt-j-6b-1-False-False-False]": { "gaudi2": { "throughput": 143.64228300147943 }, @@ -52,7 +52,7 @@ "throughput": 257.2476416844122 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[EleutherAI/gpt-neox-20b-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[EleutherAI/gpt-neox-20b-1-False-False-False]": { "gaudi2": { "throughput": 50.67672679310354 }, @@ -73,7 +73,7 @@ "throughput": 490.8621617893209 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen1.5-MoE-A2.7B-1-True-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen1.5-MoE-A2.7B-1-True-False-False]": { "gaudi2": { "throughput": 136.97314902048282 }, @@ -81,7 +81,7 @@ "throughput": 153.70259987623064 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen2-7B-256-False-True]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen2-7B-256-False-True-False]": { "gaudi2": { "output": "DeepSpeed is a machine learning framework that provides a unified interface for training deep learning models. It is designed to be easy to use and to provide high performance. DeepSpeed is built on top of PyTorch and TensorFlow, and it supports both CPU and GPU training. It is also compatible with a variety of deep learning models, including ResNet, BERT, and GPT-2.\nDeepSpeed is a machine learning framework that provides a unified interface for training deep learning models. It is designed to be easy to use and to", "throughput": 8906.268774745313 @@ -91,7 +91,7 @@ "throughput": 14968.571446984299 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen2.5-7B-4-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen2.5-7B-4-False-False-False]": { "gaudi2": { "throughput": 519.2288445392528 }, @@ -99,7 +99,7 @@ "throughput": 609.0303562033657 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen3-8B-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen3-8B-1-False-False-False]": { "gaudi2": { "throughput": 101.78595453711921 }, @@ -107,7 +107,7 @@ "throughput": 116.34827870142529 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen3-30B-A3B-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen3-30B-A3B-1-False-False-False]": { "gaudi2": { "throughput": 23.27712445319976 }, @@ -115,7 +115,7 @@ "throughput": 33.75449762678064 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Salesforce/codegen2-1B-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Salesforce/codegen2-1B-1-False-False-False]": { "gaudi1": { "throughput": 155.32071248826423 }, @@ -126,7 +126,7 @@ "throughput": 405.96090453183643 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[THUDM/chatglm2-6b-1-True-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[THUDM/chatglm2-6b-1-True-False-False]": { "gaudi2": { "throughput": 150 }, @@ -134,7 +134,7 @@ "throughput": 169.28444068272802 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[THUDM/chatglm3-6b-1-True-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[THUDM/chatglm3-6b-1-True-False-False]": { "gaudi2": { "throughput": 150 }, @@ -147,7 +147,7 @@ "throughput": 34.53559807384106 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[adept/persimmon-8b-base-4-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[adept/persimmon-8b-base-4-False-False-False]": { "gaudi2": { "throughput": 366.73968820698406 }, @@ -155,7 +155,7 @@ "throughput": 359.5154721132213 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[baichuan-inc/Baichuan2-13B-Chat-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[baichuan-inc/Baichuan2-13B-Chat-1-False-False-False]": { "gaudi2": { "throughput": 66 }, @@ -163,7 +163,7 @@ "throughput": 83.1114363254922 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[baichuan-inc/Baichuan2-7B-Chat-1-True-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[baichuan-inc/Baichuan2-7B-Chat-1-True-False-False]": { "gaudi2": { "throughput": 108 }, @@ -176,7 +176,7 @@ "throughput": 15.945023767901013 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[bigcode/starcoder-256-True-True]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[bigcode/starcoder-256-True-True-False]": { "gaudi2": { "output": "def print_hello_world():\n print(\"Hello World\")\n\ndef print_hello_world_twice():\n print_hello_world()\n print_hello_world()\n\ndef print_hello_world_thrice():\n print_hello_world()\n print_hello_world()\n print_hello_world()\n\ndef print_hello_world_four_times():\n print_hello_world()\n print_hello_world()\n print_hello_world()\n ", "throughput": 6846.575763562658 @@ -191,7 +191,7 @@ "throughput": 82.09655684566117 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[bigcode/starcoder2-3b-1-False-True]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[bigcode/starcoder2-3b-1-False-True-False]": { "gaudi2": { "output": "def print_hello_world():\n print(\"Hello World\")\n\ndef print_hello_world_with_name(name):\n print(\"Hello World, \" + name)\n\ndef print_hello_world_with_name_and_age(name, age):\n print(\"Hello World, \" + name + \", \" + str(age))\n\ndef print_hello_world_with_name_and_age_and_gender(name, age, gender):\n print(\"Hello", "throughput": 261.07213776344133 @@ -201,7 +201,7 @@ "throughput": 279.92066126452653 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[bigscience/bloomz-7b1-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[bigscience/bloomz-7b1-1-False-False-False]": { "gaudi1": { "throughput": 41.7555095197846 }, @@ -212,7 +212,7 @@ "throughput": 155.29323724597498 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[codellama/CodeLlama-34b-hf-1-True-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[codellama/CodeLlama-34b-hf-1-True-False-False]": { "gaudi2": { "throughput": 32.644 }, @@ -220,7 +220,15 @@ "throughput": 42.94755856794396 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[deepseek-ai/DeepSeek-V2-Lite-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[codellama/CodeLlama-34b-hf-1-True-False-True]": { + "gaudi2": { + "throughput": 32.644 + }, + "gaudi3": { + "throughput": 42.94755856794396 + } + }, + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[deepseek-ai/DeepSeek-V2-Lite-1-False-False-False]": { "gaudi2": { "throughput": 35 }, @@ -236,7 +244,7 @@ "throughput": 357.46365062825083 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[google/gemma-2-27b-1-False-True]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[google/gemma-2-27b-1-False-True-False]": { "gaudi2": { "output": "DeepSpeed is a machine learning framework that enables you to train models with trillions of parameters and beyond, using model parallelism to partition large models over multiple GPUs.\n\nThe following is a brief introduction to the DeepSpeed model parallel training.\n\n

1. Introduction

\n\nThe DeepSpeed model parallel training is a simple and effective way to train large models. It is a framework that enables you to train models with trillions of parameters and beyond.\n\nDeepSpeed is a distributed deep learning optimization toolkit that makes it easy and efficient", "throughput": 36.578709544111 @@ -246,7 +254,7 @@ "throughput": 46.04685368495098 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[google/gemma-2-9b-1-False-True]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[google/gemma-2-9b-1-False-True-False]": { "gaudi2": { "output": "DeepSpeed is a machine learning framework that enables training of large-scale deep learning models on a single GPU or across multiple GPUs. It is designed to be easy to use and highly scalable, making it a popular choice for training large-scale models such as GPT-3 and BERT.\n\nDeepSpeed is built on top of PyTorch, a popular deep learning framework, and provides a set of tools and libraries that make it easy to train large-scale models. It includes features such as zero-shot learning, which allows models to", "throughput": 92.302359446567 @@ -261,7 +269,7 @@ "throughput": 28.84284625836978 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[google/gemma-7b-1-False-True]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[google/gemma-7b-1-False-True-False]": { "gaudi2": { "output": "DeepSpeed is a machine learning framework that enables training of large-scale models on commodity hardware. It is designed to be a drop-in replacement for PyTorch, and it is compatible with the existing PyTorch ecosystem. DeepSpeed is designed to be easy to use, and it provides a number of features that make it easy to train large-scale models. DeepSpeed is designed to be scalable, and it can be used to train models on a single machine or on a cluster of machines. DeepSpeed is designed to be efficient,", "throughput": 109.70751574382221 @@ -271,7 +279,7 @@ "throughput": 135.97272017864475 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[gpt2-xl-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[gpt2-xl-1-False-False-False]": { "gaudi1": { "throughput": 142.11481820425706 }, @@ -287,7 +295,17 @@ "throughput": 44.39616259946937 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-1-True-True]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-1-True-True-False]": { + "gaudi2": { + "output": "DeepSpeed is a machine learning framework for deep learning. It is designed to be fast and efficient, while also being easy to use. DeepSpeed is based on the TensorFlow framework, and it uses the TensorFlow library to perform computations.\nDeepSpeed is a deep learning framework that is designed to be fast and efficient. It is based on the TensorFlow library and uses the TensorFlow library to perform computations. DeepSpeed is designed to be easy to use and to provide a high level of flex", + "throughput": 141.25776956002076 + }, + "gaudi3": { + "output": "DeepSpeed is a machine learning framework for deep learning. It is designed to be fast and efficient, while also being easy to use. DeepSpeed is based on the TensorFlow framework, and it uses the TensorFlow library to perform computations.\nDeepSpeed is a deep learning framework that is designed to be fast and efficient. It is based on the TensorFlow library and uses the TensorFlow library to perform computations. DeepSpeed is designed to be easy to use and to provide a high level of flex", + "throughput": 173.7868608608374 + } + }, + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-1-True-True-True]": { "gaudi2": { "output": "DeepSpeed is a machine learning framework for deep learning. It is designed to be fast and efficient, while also being easy to use. DeepSpeed is based on the TensorFlow framework, and it uses the TensorFlow library to perform computations.\nDeepSpeed is a deep learning framework that is designed to be fast and efficient. It is based on the TensorFlow library and uses the TensorFlow library to perform computations. DeepSpeed is designed to be easy to use and to provide a high level of flex", "throughput": 141.25776956002076 @@ -297,7 +315,7 @@ "throughput": 173.7868608608374 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-512-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-512-False-False-False]": { "gaudi2": { "throughput": 8711 }, @@ -305,7 +323,15 @@ "throughput": 15150.480373545233 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-512-True-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-512-False-False-True]": { + "gaudi2": { + "throughput": 8711 + }, + "gaudi3": { + "throughput": 15150.480373545233 + } + }, + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-512-True-False-False]": { "gaudi2": { "throughput": 12808 }, @@ -313,7 +339,15 @@ "throughput": 23362.95410956595 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Meta-Llama-3-8B-1-True-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-512-True-False-True]": { + "gaudi2": { + "throughput": 12808 + }, + "gaudi3": { + "throughput": 23362.95410956595 + } + }, + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Meta-Llama-3-8B-1-True-False-False]": { "gaudi2": { "throughput": 129 }, @@ -321,7 +355,15 @@ "throughput": 162.03504027530752 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[microsoft/phi-2-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Meta-Llama-3-8B-1-True-False-True]": { + "gaudi2": { + "throughput": 129 + }, + "gaudi3": { + "throughput": 162.03504027530752 + } + }, + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[microsoft/phi-2-1-False-False-False]": { "gaudi1": { "throughput": 92.53083167241344 }, @@ -337,7 +379,7 @@ "throughput": 41.21906841459711 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mistralai/Mistral-7B-v0.1-1-True-True]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mistralai/Mistral-7B-v0.1-1-True-True-False]": { "gaudi2": { "output": "DeepSpeed is a machine learning framework that accelerates training of large models on a single machine or distributed systems. It is designed to be compatible with PyTorch and TensorFlow, and can be used to train models on a single machine or on a distributed system.\n\nDeepSpeed is a machine learning framework that accelerates training of large models on a single machine or distributed systems. It is designed to be compatible with PyTorch and TensorFlow, and can be used to train models on a single machine or on a distributed system", "throughput": 134.94827207337997 @@ -347,7 +389,7 @@ "throughput": 160.48685620965531 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mistralai/Mixtral-8x7B-v0.1-1-False-True]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mistralai/Mixtral-8x7B-v0.1-1-False-True-False]": { "gaudi2": { "output": "DeepSpeed is a machine learning framework that enables training of large models on a single machine with multiple GPUs. It is designed to be easy to use and efficient, and it supports a wide range of models and tasks.\n\nDeepSpeed is a deep learning framework that enables training of large models on a single machine with multiple GPUs. It is designed to be easy to use and efficient, and it supports a wide range of models and tasks.\n\nDeepSpeed is a deep learning framework that enables training of large models on a", "throughput": 71.29570003665306 @@ -357,7 +399,7 @@ "throughput": 81.6817273229847 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mosaicml/mpt-30b-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mosaicml/mpt-30b-1-False-False-False]": { "gaudi2": { "throughput": 36.06464336116623 }, @@ -370,7 +412,7 @@ "throughput": 45.45168927038262 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[openbmb/MiniCPM3-4B-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[openbmb/MiniCPM3-4B-1-False-False-False]": { "gaudi2": { "throughput": 65.116 }, @@ -378,7 +420,7 @@ "throughput": 67.06139602530865 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[stabilityai/stablelm-2-12b-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[stabilityai/stablelm-2-12b-1-False-False-False]": { "gaudi1": { "throughput": 26.80858949645992 }, @@ -389,7 +431,7 @@ "throughput": 74.8904496532218 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[state-spaces/mamba-130m-hf-1536-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[state-spaces/mamba-130m-hf-1536-False-False-False]": { "gaudi2": { "throughput": 3100.9825044466907 }, @@ -402,7 +444,7 @@ "throughput": 794.542 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[tiiuae/falcon-40b-1-True-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[tiiuae/falcon-40b-1-True-False-False]": { "gaudi2": { "throughput": 25.202450111088346 }, @@ -415,7 +457,7 @@ "throughput": 44.82870145718665 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[tiiuae/falcon-mamba-7b-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[tiiuae/falcon-mamba-7b-1-False-False-False]": { "gaudi2": { "throughput": 47.1464839567739 }, @@ -503,7 +545,15 @@ "throughput": 5057.520303949097 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-4-207-False-2048-128]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-4-207-False-2048-128-False]": { + "gaudi2": { + "throughput": 568.5 + }, + "gaudi3": { + "throughput": 918.3333993444961 + } + }, + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-4-207-False-2048-128-True]": { "gaudi2": { "throughput": 568.5 }, @@ -511,7 +561,7 @@ "throughput": 918.3333993444961 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-4-3042-False-128-128]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-4-3042-False-128-128-False]": { "gaudi2": { "throughput": 5374.6 }, @@ -519,7 +569,15 @@ "throughput": 9105.741034094377 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-4-750-False-128-2048]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-4-3042-False-128-128-True]": { + "gaudi2": { + "throughput": 5374.6 + }, + "gaudi3": { + "throughput": 9105.741034094377 + } + }, + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-4-750-False-128-2048-True]": { "gaudi2": { "throughput": 7422.4 }, @@ -527,7 +585,15 @@ "throughput": 12966.32808044709 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-8-172-False-2048-2048]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-8-172-False-2048-2048-False]": { + "gaudi2": { + "throughput": 4656.2 + }, + "gaudi3": { + "throughput": 6059.088893259565 + } + }, + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-8-172-False-2048-2048-True]": { "gaudi2": { "throughput": 4656.2 }, @@ -535,7 +601,15 @@ "throughput": 6059.088893259565 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-1230-False-128-128]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-1230-False-128-128-False]": { + "gaudi2": { + "throughput": 13152.7 + }, + "gaudi3": { + "throughput": 19132.3193582529 + } + }, + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-1230-False-128-128-True]": { "gaudi2": { "throughput": 13152.7 }, @@ -543,7 +617,7 @@ "throughput": 19132.3193582529 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-163-False-128-2048]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-163-False-128-2048-False]": { "gaudi2": { "throughput": 4774.7 }, @@ -551,7 +625,15 @@ "throughput": 7240.988993899055 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-81-False-2048-2048]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-163-False-128-2048-True]": { + "gaudi2": { + "throughput": 4774.7 + }, + "gaudi3": { + "throughput": 7240.988993899055 + } + }, + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-81-False-2048-2048-False]": { "gaudi2": { "throughput": 1942.9 }, @@ -559,7 +641,23 @@ "throughput": 2868.2782272085133 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-94-False-2048-128]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-81-False-2048-2048-True]": { + "gaudi2": { + "throughput": 1942.9 + }, + "gaudi3": { + "throughput": 2868.2782272085133 + } + }, + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-94-False-2048-128-False]": { + "gaudi2": { + "throughput": 1293.3 + }, + "gaudi3": { + "throughput": 1852.6696711170073 + } + }, + "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-94-False-2048-128-True]": { "gaudi2": { "throughput": 1293.3 }, @@ -567,7 +665,7 @@ "throughput": 1852.6696711170073 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[microsoft/phi-2-1-1-True-128-128]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[microsoft/phi-2-1-1-True-128-128-False]": { "gaudi2": { "throughput": 254.08932787178165 }, @@ -591,7 +689,7 @@ "throughput": 1681.4401450088983 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mistral-7B-Instruct-v0.2-1-44-True-2048-2048]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mistral-7B-Instruct-v0.2-1-44-True-2048-2048-False]": { "gaudi2": { "throughput": 3393.149396451692 }, @@ -599,7 +697,7 @@ "throughput": 4877.759076826148 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mistral-7B-Instruct-v0.2-1-896-True-128-128]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mistral-7B-Instruct-v0.2-1-896-True-128-128-False]": { "gaudi2": { "throughput": 17068.965283763682 }, @@ -607,7 +705,7 @@ "throughput": 25100.757003294264 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-1-1-True-128-128]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-1-1-True-128-128-False]": { "gaudi2": { "throughput": 40.94 }, @@ -615,7 +713,7 @@ "throughput": 114.8447433058542 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-2-48-True-2048-2048]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-2-48-True-2048-2048-False]": { "gaudi2": { "throughput": 1147.5 }, @@ -623,7 +721,7 @@ "throughput": 2632.4017718271375 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-2-768-True-128-128]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-2-768-True-128-128-False]": { "gaudi2": { "throughput": 3428.65 }, @@ -647,7 +745,7 @@ "throughput": 379.03 } }, - "tests/test_text_generation_example.py::test_text_generation_fp8[tiiuae/falcon-180B-4-950-True-128-128]": { + "tests/test_text_generation_example.py::test_text_generation_fp8[tiiuae/falcon-180B-4-950-True-128-128-False]": { "gaudi2": { "throughput": 2506.68 }, @@ -679,7 +777,7 @@ "throughput": 182.2741046353745 } }, - "tests/test_text_generation_example.py::test_text_generation_bf16_1x[moonshotai/Moonlight-16B-A3B-1-False-False]": { + "tests/test_text_generation_example.py::test_text_generation_bf16_1x[moonshotai/Moonlight-16B-A3B-1-False-False-False]": { "gaudi2": { "output": "Baseline to set", "throughput": 0