vllm-project · skavulya · May 15, 2026 · May 15, 2026 · May 18, 2026 · May 18, 2026
@@ -164,6 +164,7 @@ jobs:
     needs: [prepare-release-branch, setup_and_build, discover_runner]
     # --- UPDATED: Run on the specific node ---
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
     steps:
       - name: Run pytest in tests/unit_tests
         run: |
@@ -216,6 +217,7 @@ jobs:
     needs: [prepare-release-branch, setup_and_build, discover_tests, discover_runner]
     # --- UPDATED: Run on the specific node ---
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
     strategy:
       fail-fast: false
       matrix:
@@ -248,6 +250,7 @@ jobs:
     needs: [prepare-release-branch, setup_and_build, discover_runner]
     # --- UPDATED: Run on the specific node ---
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
     steps:
       - name: Run Data Parallel test
         run: |
@@ -275,6 +278,7 @@ jobs:
     needs: [prepare-release-branch, setup_and_build, discover_runner]
     # --- UPDATED: Run on the specific node ---
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
     steps:
       - name: Run PD disaggregate test
         run: |
@@ -305,6 +309,7 @@ jobs:
     needs: [prepare-release-branch, setup_and_build, discover_runner]
     # --- UPDATED: Run on the specific node ---
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
     steps:
       - name: Run Sharegpt performance tests with warmup
         run: |

@@ -101,6 +101,7 @@ jobs:
     needs: [setup_and_build, discover_runner]
     # <-- UPDATED: Runs on the specific runner
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
     steps:
       - name: Run pytest in tests/unit_tests
         run: |
@@ -157,6 +158,7 @@ jobs:
     needs: [setup_and_build, discover_tests, discover_runner]
     # <-- UPDATED: Runs on the specific runner
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
     strategy:
       fail-fast: false
       matrix:
@@ -192,6 +194,7 @@ jobs:
     needs: [setup_and_build, discover_runner]
     # <-- UPDATED: Runs on the specific runner
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
     steps:
       - name: Run Data Parallel test
         run: |
@@ -220,6 +223,7 @@ jobs:
     needs: [setup_and_build, discover_runner]
     # <-- UPDATED: Runs on the specific runner
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
     steps:
       - name: Run PD disaggregate test
         run: |

@@ -17,8 +17,14 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+  gate:
+     runs-on: ubuntu-latest
+     environment: pre-merge-approval
+     steps:
+       - run: echo "Approved"
   execute_pre_merge:
     runs-on: ubuntu-latest
+    needs: gate
     timeout-minutes: 720
     permissions:
       actions: write       # dispatch workflows, read run status, cancel orphaned runs

@@ -29,6 +29,7 @@ concurrency:
 jobs:
   retrieve_head_sha:
     runs-on: ubuntu-latest
+    timeout-minutes: 720
     outputs:
       head_sha: ${{ steps.set_sha.outputs.head_sha }}
     steps:
@@ -40,6 +41,7 @@ jobs:
   gatekeeper:
     needs: retrieve_head_sha
     runs-on: ubuntu-latest
+    timeout-minutes: 720
     permissions:
       # Required to read the status of checks and PR details
       checks: read
@@ -136,6 +138,7 @@ jobs:
   discover_runner:
     needs: gatekeeper
     runs-on: ${{ inputs.use_hourly_runner == 'true' && 'hourly-ci' || 'pr-ci' }}
+    timeout-minutes: 720
     outputs:
       runner_name: ${{ steps.get_name.outputs.name }}
     steps:
@@ -150,6 +153,7 @@ jobs:
     needs: [discover_runner, retrieve_head_sha]
     # --- UPDATED: Run on the specific node ---
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    timeout-minutes: 720
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
     steps:
@@ -180,6 +184,7 @@ jobs:
   discover_calibration_tests:
     needs: [discover_runner, retrieve_head_sha]
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    timeout-minutes: 720
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
     steps:
@@ -207,6 +212,7 @@ jobs:
     # This job runs in parallel with the build job
     needs: [gatekeeper, retrieve_head_sha]
     runs-on: ubuntu-latest
+    timeout-minutes: 720
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -235,6 +241,7 @@ jobs:
     if: inputs.skip_tests != 'true'
     # --- UPDATED: Run on the specific node ---
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    timeout-minutes: 720
     permissions:
       contents: read # Required to checkout code and read history
     outputs:
@@ -354,6 +361,8 @@ jobs:
     needs: [pre_merge_hpu_test_build, discover_runner, retrieve_head_sha]
     # --- UPDATED: Run on the specific node ---
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
+    timeout-minutes: 720
     steps:
       - name: Run pytest in tests/unit_tests
         run: |
@@ -378,6 +387,8 @@ jobs:
     needs: [pre_merge_hpu_test_build, hpu_unit_tests, discover_runner, retrieve_head_sha]
     # --- UPDATED: Run on the specific node ---
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
+    timeout-minutes: 720
     steps:
       - name: Run test scripts
         run: |
@@ -408,6 +419,8 @@ jobs:
     needs: [pre_merge_hpu_test_build, hpu_unit_tests, discover_runner, retrieve_head_sha]
     # --- UPDATED: Run on the specific node ---
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
+    timeout-minutes: 720
     steps:
       - name: Run test scripts
         run: |
@@ -433,6 +446,8 @@ jobs:
     needs: [pre_merge_hpu_test_build, hpu_unit_tests, discover_runner, retrieve_head_sha]
     # --- UPDATED: Run on the specific node ---
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
+    timeout-minutes: 720
     steps:
       - name: Run test scripts
         run: |
@@ -459,6 +474,8 @@ jobs:
     needs: [pre_merge_hpu_test_build, hpu_unit_tests, discover_tests, discover_runner, retrieve_head_sha]
     # --- UPDATED: Run on the specific node ---
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
+    timeout-minutes: 720
     strategy:
       fail-fast: false
       matrix:
@@ -491,6 +508,8 @@ jobs:
   calibration_tests:
     needs: [pre_merge_hpu_test_build, hpu_unit_tests, discover_calibration_tests, discover_runner, retrieve_head_sha]
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    environment: approved-workflow
+    timeout-minutes: 720
     strategy:
       fail-fast: false
       matrix:
@@ -522,6 +541,7 @@ jobs:
   calibration_arg_parsing_tests:
     needs: [pre_merge_hpu_test_build, discover_runner, retrieve_head_sha]
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    timeout-minutes: 720
     steps:
       - name: Run calibration arg parsing tests
         run: |
@@ -544,6 +564,7 @@ jobs:
     needs: [retrieve_head_sha]
     if: inputs.is_merge_group != 'true'
     runs-on: ubuntu-latest
+    timeout-minutes: 720
     outputs:
       nixl_changed: ${{ steps.check.outputs.nixl_changed }}
     steps:
@@ -571,6 +592,7 @@ jobs:
     needs: [check_dockerfile_changes, discover_runner, retrieve_head_sha]
     if: needs.check_dockerfile_changes.outputs.nixl_changed == 'true'
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    timeout-minutes: 720
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -595,6 +617,7 @@ jobs:
     needs: [hpu_unit_tests, e2e, hpu_perf_tests, calibration_tests, calibration_arg_parsing_tests, discover_runner]
     # --- UPDATED: Run on the specific node ---
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    timeout-minutes: 720
     # This job is required to pass for pre-merge CI. By itself it does nothing, and will only pass if all jobs specified in "needs" list pass.
     steps:
       - name: Succeeded if all previous jobs passed
@@ -605,6 +628,7 @@ jobs:
     # This job runs after hpu-test-suite completes
     needs: [pre_merge_hpu_test, pre_merge_hpu_test_build]
     runs-on: ubuntu-latest
+    timeout-minutes: 720
     permissions:
       # Permissions are required on a per-job basis
       pull-requests: write
@@ -624,6 +648,7 @@ jobs:
     if: always()
     needs: [discover_runner, hpu_unit_tests, hpu_pd_tests, hpu_perf_tests, hpu_dp_tests, e2e, calibration_tests, calibration_arg_parsing_tests]
     runs-on: ${{ needs.discover_runner.outputs.runner_name }}
+    timeout-minutes: 720
     steps:
       - name: Remove Docker image to free up space
         env:

@@ -68,7 +68,8 @@ The vLLM Hardware Plugin for Intel® Gaudi® integrates [Intel® Gaudi® AI acce
 5. Install torchaudio (required by some upstream vLLM models such as QWEN3_5). Use the CPU wheel with `--no-deps` to avoid pulling a conflicting CUDA torch:
 
     ```bash
-    pip install --no-deps torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
+    TORCH_VERSION=$(python3 -c "import re, torch; print(re.match(r'(\d+\.\d+\.\d+)', torch.__version__).group(1))")
+    pip install --no-deps torchaudio==$TORCH_VERSION --extra-index-url https://download.pytorch.org/whl/cpu
     ```
 
     To see all the available installation methods, such as NIXL, see the [Installation](https://vllm-gaudi.readthedocs.io/en/latest/getting_started/installation.html) guide.

@@ -1,9 +1,7 @@
 # Dependencies for HPU code
-ray>=2.48.0
 pandas>=2.2.3
 numba>=0.58.0
 numpy>=1.26.0
-transformers >= 4.56.0, != 5.0.*, != 5.1.*, != 5.2.*, != 5.3.*, != 5.4.*, != 5.5.0, != 5.6.*
 kaldi-native-fbank >= 1.18.7
 decord >= 0.6.0
 tblib==3.1.0
@@ -415,6 +415,7 @@ run_longbench_qwen3_30b_fp8_static_fp8_fsdpa_slicing_compile_test() {
 run_gsm8k_qwen35_35b_a3b_test() {
     echo "➡️ Testing GSM8K on Qwen3.5-35B-A3B..."
     VLLM_SKIP_WARMUP=True ENABLE_APC=False VLLM_FUSED_BLOCK_SOFTMAX_ADJUSTMENT=False VLLM_GRAPH_RESERVED_MEM=0.8 \
+    VLLM_PROMPT_BS_BUCKET_MAX=32 \
     pytest -v -s "${VLLM_GAUDI_PREFIX}/tests/models/language/generation/test_common.py" --model_card_path "${VLLM_GAUDI_PREFIX}/tests/full_tests/model_cards/qwen3.5-35b-a3b.yaml"
     echo "✅ Test with Qwen3.5-35B-A3B passed."
 }

@@ -15,4 +15,4 @@ model_card:
 
 metrics:
   name: exact_match,strict-match
-  value: 0.75
+  value: 0.9