diff --git a/tests/evals/gsm8k/configs/hybrid/Qwen3-Next-FP8-TP4-MTP-Align.yaml b/tests/evals/gsm8k/configs/hybrid/Qwen3-Next-FP8-TP4-MTP-Align.yaml new file mode 100644 index 000000000000..4373f1bbfa14 --- /dev/null +++ b/tests/evals/gsm8k/configs/hybrid/Qwen3-Next-FP8-TP4-MTP-Align.yaml @@ -0,0 +1,9 @@ +model_name: "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8" +accuracy_threshold: 0.85 +num_questions: 1319 +num_fewshot: 5 +server_args: >- + --max-model-len 4096 + --tensor-parallel-size 4 + --speculative-config '{"method":"qwen3_next_mtp","num_speculative_tokens":2}' + --enable-prefix-caching diff --git a/tests/evals/gsm8k/configs/hybrid/Qwen3-Next-FP8-TP4-MTP.yaml b/tests/evals/gsm8k/configs/hybrid/Qwen3-Next-FP8-TP4-MTP.yaml new file mode 100644 index 000000000000..8736b929e5ad --- /dev/null +++ b/tests/evals/gsm8k/configs/hybrid/Qwen3-Next-FP8-TP4-MTP.yaml @@ -0,0 +1,8 @@ +model_name: "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8" +accuracy_threshold: 0.85 +num_questions: 1319 +num_fewshot: 5 +server_args: >- + --max-model-len 4096 + --tensor-parallel-size 4 + --speculative-config '{"method":"qwen3_next_mtp","num_speculative_tokens":2}' diff --git a/tests/evals/gsm8k/configs/hybrid/Qwen3-Next-FP8-TP4.yaml b/tests/evals/gsm8k/configs/hybrid/Qwen3-Next-FP8-TP4.yaml new file mode 100644 index 000000000000..c9dfc64131d0 --- /dev/null +++ b/tests/evals/gsm8k/configs/hybrid/Qwen3-Next-FP8-TP4.yaml @@ -0,0 +1,7 @@ +model_name: "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8" +accuracy_threshold: 0.85 +num_questions: 1319 +num_fewshot: 5 +server_args: >- + --max-model-len 4096 + --tensor-parallel-size 4 diff --git a/tests/evals/gsm8k/configs/hybrid/models-h100.txt b/tests/evals/gsm8k/configs/hybrid/models-h100.txt new file mode 100644 index 000000000000..a84e6b5fc105 --- /dev/null +++ b/tests/evals/gsm8k/configs/hybrid/models-h100.txt @@ -0,0 +1,3 @@ +Qwen3-Next-FP8-TP4.yaml +Qwen3-Next-FP8-TP4-MTP.yaml +Qwen3-Next-FP8-TP4-MTP-Align.yaml