diff --git a/tests/full_tests/ci_e2e_discoverable_tests.sh b/tests/full_tests/ci_e2e_discoverable_tests.sh index c4fcc8650a..9f5e5ed0ab 100755 --- a/tests/full_tests/ci_e2e_discoverable_tests.sh +++ b/tests/full_tests/ci_e2e_discoverable_tests.sh @@ -415,6 +415,7 @@ run_longbench_qwen3_30b_fp8_static_fp8_fsdpa_slicing_compile_test() { run_gsm8k_qwen35_35b_a3b_test() { echo "➡️ Testing GSM8K on Qwen3.5-35B-A3B..." VLLM_SKIP_WARMUP=True ENABLE_APC=False VLLM_FUSED_BLOCK_SOFTMAX_ADJUSTMENT=False VLLM_GRAPH_RESERVED_MEM=0.8 \ + VLLM_PROMPT_BS_BUCKET_MAX=32 \ pytest -v -s "${VLLM_GAUDI_PREFIX}/tests/models/language/generation/test_common.py" --model_card_path "${VLLM_GAUDI_PREFIX}/tests/full_tests/model_cards/qwen3.5-35b-a3b.yaml" echo "✅ Test with Qwen3.5-35B-A3B passed." } diff --git a/tests/full_tests/model_cards/qwen3.5-35b-a3b.yaml b/tests/full_tests/model_cards/qwen3.5-35b-a3b.yaml index 2f92da9722..fdf42e5366 100644 --- a/tests/full_tests/model_cards/qwen3.5-35b-a3b.yaml +++ b/tests/full_tests/model_cards/qwen3.5-35b-a3b.yaml @@ -15,4 +15,4 @@ model_card: metrics: name: exact_match,strict-match - value: 0.75 + value: 0.9