diff --git a/tests/integration/defs/accuracy/references/cnn_dailymail.yaml b/tests/integration/defs/accuracy/references/cnn_dailymail.yaml index ddd6589a439..5961f7bc9dd 100644 --- a/tests/integration/defs/accuracy/references/cnn_dailymail.yaml +++ b/tests/integration/defs/accuracy/references/cnn_dailymail.yaml @@ -188,6 +188,8 @@ mistralai/Mistral-7B-Instruct-v0.3: accuracy: 31.457 - quant_algo: W4A8_AWQ accuracy: 31.201 +mistralai/Mistral-Small-3.1-24B-Instruct-2503: + - accuracy: 29.20 mistralai/Mistral-Nemo-Base-2407: - quant_algo: FP8 kv_cache_quant_algo: FP8 diff --git a/tests/integration/defs/accuracy/references/gsm8k.yaml b/tests/integration/defs/accuracy/references/gsm8k.yaml index 850f27389b8..1f10c358962 100644 --- a/tests/integration/defs/accuracy/references/gsm8k.yaml +++ b/tests/integration/defs/accuracy/references/gsm8k.yaml @@ -122,5 +122,7 @@ mistralai/Ministral-8B-Instruct-2410: - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 78.35 +mistralai/Mistral-Small-3.1-24B-Instruct-2503: + - accuracy: 89.23 microsoft/Phi-4-multimodal-instruct: - accuracy: 81.19 diff --git a/tests/integration/defs/accuracy/references/mmlu.yaml b/tests/integration/defs/accuracy/references/mmlu.yaml index 86a07220237..90d72bbdbb5 100644 --- a/tests/integration/defs/accuracy/references/mmlu.yaml +++ b/tests/integration/defs/accuracy/references/mmlu.yaml @@ -95,6 +95,8 @@ mistralai/Mixtral-8x7B-Instruct-v0.1: mistralai/Mixtral-8x22B-v0.1: - quant_algo: FP8 accuracy: 77.63 +mistralai/Mistral-Small-3.1-24B-Instruct-2503: + - accuracy: 81.7 google/gemma-2-9b-it: - accuracy: 73.05 google/gemma-3-27b-it: diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index 4af27e1d587..1ad88fa106b 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -512,6 +512,20 @@ def test_auto_dtype(self): task.evaluate(llm) +class TestMistralSmall24B(LlmapiAccuracyTestHarness): + MODEL_NAME = "mistralai/Mistral-Small-3.1-24B-Instruct-2503" + MODEL_PATH = f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503" + + def test_auto_dtype(self): + with LLM(self.MODEL_PATH) as llm: + task = CnnDailymail(self.MODEL_NAME) + task.evaluate(llm) + task = MMLU(self.MODEL_NAME) + task.evaluate(llm) + task = GSM8K(self.MODEL_NAME) + task.evaluate(llm) + + class TestMinistral8BInstruct(LlmapiAccuracyTestHarness): MODEL_NAME = "mistralai/Ministral-8B-Instruct-2410" MODEL_PATH = f"{llm_models_root()}/Ministral-8B-Instruct-2410" diff --git a/tests/integration/test_lists/test-db/l0_h100.yml b/tests/integration/test_lists/test-db/l0_h100.yml index 962b87abf72..56eb1cf50c1 100644 --- a/tests/integration/test_lists/test-db/l0_h100.yml +++ b/tests/integration/test_lists/test-db/l0_h100.yml @@ -192,6 +192,7 @@ l0_h100: - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=vanilla-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_no_kv_cache_reuse[quant_dtype=none-mtp_nextn=2-fp8kv=False-attention_dp=True-cuda_graph=True-overlap_scheduler=True] - accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_auto_dtype + - accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency] - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[llguidance] - test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image-True]