diff --git a/tests/integration/defs/accuracy/references/mmlu.yaml b/tests/integration/defs/accuracy/references/mmlu.yaml index 122913c74c6..bf70fa6d24c 100644 --- a/tests/integration/defs/accuracy/references/mmlu.yaml +++ b/tests/integration/defs/accuracy/references/mmlu.yaml @@ -203,6 +203,7 @@ Qwen3/Qwen3-8B: - spec_dec_algo: Eagle accuracy: 76.12 Qwen3/Qwen3-30B-A3B: + - accuracy: 79.53 - quant_algo: FP8_BLOCK_SCALES accuracy: 79.53 - quant_algo: FP8 diff --git a/tests/integration/defs/accuracy/test_disaggregated_serving.py b/tests/integration/defs/accuracy/test_disaggregated_serving.py index 86595362095..88b97ae84d5 100644 --- a/tests/integration/defs/accuracy/test_disaggregated_serving.py +++ b/tests/integration/defs/accuracy/test_disaggregated_serving.py @@ -915,4 +915,6 @@ def test_mixed_ctx_gen_model(self, ctx_pp, gen_tp): gen_tp=gen_tp, test_sets=[GSM8K, MMLU], ctx_model=ctx_model, - gen_model=gen_model) + gen_model=gen_model, + ctx_instances=1, + gen_instances=1)