khairulkabir1661 · khairulkabir1661 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/.buildkite/hardware_tests/amd.yaml b/.buildkite/hardware_tests/amd.yaml
@@ -10,7 +10,7 @@ steps:
       docker build
       --build-arg max_jobs=16
       --build-arg REMOTE_VLLM=1
-      --build-arg ARG_PYTORCH_ROCM_ARCH='gfx90a;gfx942'
+      --build-arg ARG_PYTORCH_ROCM_ARCH='gfx90a;gfx942;gfx950'
       --build-arg VLLM_BRANCH=$BUILDKITE_COMMIT
       --tag "rocm/vllm-ci:${BUILDKITE_COMMIT}"
       -f docker/Dockerfile.rocm

diff --git a/.buildkite/hardware_tests/cpu.yaml b/.buildkite/hardware_tests/cpu.yaml
@@ -3,7 +3,6 @@ depends_on: []
 steps:
 - label: CPU-Kernel Tests
   depends_on: []
-  soft_fail: true
   device: intel_cpu
   no_plugin: true
   source_file_dependencies:
@@ -21,9 +20,21 @@ steps:
       pytest -x -v -s tests/kernels/moe/test_cpu_fused_moe.py
       pytest -x -v -s tests/kernels/test_onednn.py"
 
+- label: CPU-Compatibility Tests
+  depends_on: []
+  device: intel_cpu
+  no_plugin: true
+  source_file_dependencies:
+  - cmake/cpu_extension.cmake
+  - setup.py
+  - vllm/platforms/cpu.py
+  commands:
+    - |
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
+      bash .buildkite/scripts/hardware_ci/run-cpu-compatibility-test.sh"
+
 - label: CPU-Language Generation and Pooling Model Tests
   depends_on: []
-  soft_fail: true
   device: intel_cpu
   no_plugin: true
   source_file_dependencies:
@@ -39,7 +50,6 @@ steps:
 
 - label: CPU-Quantization Model Tests
   depends_on: []
-  soft_fail: true
   device: intel_cpu
   no_plugin: true
   source_file_dependencies:
@@ -59,7 +69,6 @@ steps:
 
 - label: CPU-Distributed Tests
   depends_on: []
-  soft_fail: true
   device: intel_cpu
   no_plugin: true
   source_file_dependencies:
@@ -78,7 +87,6 @@ steps:
 
 - label: CPU-Multi-Modal Model Tests %N
   depends_on: []
-  soft_fail: true
   device: intel_cpu
   no_plugin: true
   source_file_dependencies:
@@ -93,7 +101,6 @@ steps:
 
 - label: "Arm CPU Test"
   depends_on: []
-  soft_fail: true
   device: arm_cpu
   no_plugin: true
   commands: 

diff --git a/.buildkite/image_build/image_build.sh b/.buildkite/image_build/image_build.sh
@@ -8,7 +8,7 @@ clean_docker_tag() {
 }
 
 print_usage_and_exit() {
-    echo "Usage: $0 <registry> <repo> <commit> <branch> <vllm_use_precompiled> <vllm_merge_base_commit> <cache_from> <cache_to>"
+    echo "Usage: $0 <registry> <repo> <commit> <branch> <image_tag> [<image_tag_latest>]"
     exit 1
 }
 
@@ -151,15 +151,15 @@ print_bake_config() {
     docker buildx bake -f "${VLLM_BAKE_FILE_PATH}" -f "${CI_HCL_PATH}" --print "${TARGET}" | tee "${BAKE_CONFIG_FILE}" || true
     echo "Saved bake config to ${BAKE_CONFIG_FILE}"
     echo "--- :arrow_down: Uploading bake config to Buildkite"
-    buildkite-agent artifact upload "${BAKE_CONFIG_FILE}"
+    (cd "$(dirname "${BAKE_CONFIG_FILE}")" && buildkite-agent artifact upload "$(basename "${BAKE_CONFIG_FILE}")")
 }
 
 #################################
 #         Main Script           #
 #################################
 print_instance_info
 
-if [[ $# -lt 7 ]]; then
+if [[ $# -lt 5 ]]; then
     print_usage_and_exit
 fi
 
@@ -168,10 +168,8 @@ REGISTRY=$1
 REPO=$2
 BUILDKITE_COMMIT=$3
 BRANCH=$4
-VLLM_USE_PRECOMPILED=0
-VLLM_MERGE_BASE_COMMIT=""
-IMAGE_TAG=$7
-IMAGE_TAG_LATEST=${8:-} # only used for main branch, optional
+IMAGE_TAG=$5
+IMAGE_TAG_LATEST=${6:-} # only used for main branch, optional
 
 # build config
 TARGET="test-ci"
@@ -198,17 +196,13 @@ export CACHE_FROM
 export CACHE_FROM_BASE_BRANCH
 export CACHE_FROM_MAIN
 export CACHE_TO
-export VLLM_USE_PRECOMPILED
-export VLLM_MERGE_BASE_COMMIT
 
 # print args
 echo "--- :mag: Arguments"
 echo "REGISTRY: ${REGISTRY}"
 echo "REPO: ${REPO}"
 echo "BUILDKITE_COMMIT: ${BUILDKITE_COMMIT}"
 echo "BRANCH: ${BRANCH}"
-echo "VLLM_USE_PRECOMPILED: ${VLLM_USE_PRECOMPILED}"
-echo "VLLM_MERGE_BASE_COMMIT: ${VLLM_MERGE_BASE_COMMIT}"
 echo "IMAGE_TAG: ${IMAGE_TAG}"
 echo "IMAGE_TAG_LATEST: ${IMAGE_TAG_LATEST}"
 

diff --git a/.buildkite/image_build/image_build.yaml b/.buildkite/image_build/image_build.yaml
@@ -5,8 +5,7 @@ steps:
     depends_on: []
     timeout_in_minutes: 600
     commands:
-    - if [[ "$BUILDKITE_BRANCH" != "main" ]]; then .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $VLLM_USE_PRECOMPILED $VLLM_MERGE_BASE_COMMIT $IMAGE_TAG; fi
-    - if [[ "$BUILDKITE_BRANCH" == "main" ]]; then .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $VLLM_USE_PRECOMPILED $VLLM_MERGE_BASE_COMMIT $IMAGE_TAG $IMAGE_TAG_LATEST; fi
+    - if [[ "$BUILDKITE_BRANCH" == "main" ]]; then .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $IMAGE_TAG $IMAGE_TAG_LATEST; else .buildkite/image_build/image_build.sh $REGISTRY $REPO $BUILDKITE_COMMIT $BRANCH $IMAGE_TAG; fi
     retry:
       automatic:
         - exit_status: -1  # Agent was lost

diff --git a/.buildkite/image_build/image_build_cpu.sh b/.buildkite/image_build/image_build_cpu.sh
@@ -11,10 +11,10 @@ REPO=$2
 BUILDKITE_COMMIT=$3
 
 # authenticate with AWS ECR
-aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $REGISTRY
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
 
 # skip build if image already exists
-if [[ -z $(docker manifest inspect $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu) ]]; then
+if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu) ]]; then
   echo "Image not found, proceeding with build..."
 else
   echo "Image found"
@@ -24,13 +24,11 @@ fi
 # build
 docker build --file docker/Dockerfile.cpu \
   --build-arg max_jobs=16 \
-  --build-arg buildkite_commit=$BUILDKITE_COMMIT \
-  --build-arg VLLM_CPU_AVX512BF16=true \
-  --build-arg VLLM_CPU_AVX512VNNI=true \
-  --build-arg VLLM_CPU_AMXBF16=true \
-  --tag $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu \
+  --build-arg buildkite_commit="$BUILDKITE_COMMIT" \
+  --build-arg VLLM_CPU_X86=true \
+  --tag "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu \
   --target vllm-test \
   --progress plain .
 
 # push
-docker push $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu
+docker push "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu
diff --git a/.buildkite/image_build/image_build_cpu_arm64.sh b/.buildkite/image_build/image_build_cpu_arm64.sh
@@ -11,10 +11,10 @@ REPO=$2
 BUILDKITE_COMMIT=$3
 
 # authenticate with AWS ECR
-aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $REGISTRY
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
 
 # skip build if image already exists
-if [[ -z $(docker manifest inspect $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu) ]]; then
+if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-arm64-cpu) ]]; then
   echo "Image not found, proceeding with build..."
 else
   echo "Image found"
@@ -24,10 +24,10 @@ fi
 # build
 docker build --file docker/Dockerfile.cpu \
   --build-arg max_jobs=16 \
-  --build-arg buildkite_commit=$BUILDKITE_COMMIT \
-  --tag $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu \
+  --build-arg buildkite_commit="$BUILDKITE_COMMIT" \
+  --tag "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-arm64-cpu \
   --target vllm-test \
   --progress plain .
 
 # push
-docker push $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu
+docker push "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-arm64-cpu
diff --git a/.buildkite/image_build/image_build_hpu.sh b/.buildkite/image_build/image_build_hpu.sh
@@ -11,10 +11,10 @@ REPO=$2
 BUILDKITE_COMMIT=$3
 
 # authenticate with AWS ECR
-aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $REGISTRY
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
 
 # skip build if image already exists
-if [[ -z $(docker manifest inspect $REGISTRY/$REPO:$BUILDKITE_COMMIT-hpu) ]]; then
+if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-hpu) ]]; then
   echo "Image not found, proceeding with build..."
 else
   echo "Image found"
@@ -25,10 +25,10 @@ fi
 docker build \
   --file tests/pytorch_ci_hud_benchmark/Dockerfile.hpu \
   --build-arg max_jobs=16 \
-  --build-arg buildkite_commit=$BUILDKITE_COMMIT \
-  --tag $REGISTRY/$REPO:$BUILDKITE_COMMIT-hpu \
+  --build-arg buildkite_commit="$BUILDKITE_COMMIT" \
+  --tag "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-hpu \
   --progress plain \
   https://github.com/vllm-project/vllm-gaudi.git
 
 # push
-docker push $REGISTRY/$REPO:$BUILDKITE_COMMIT-hpu
+docker push "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-hpu
diff --git a/.buildkite/lm-eval-harness/configs/SparseLlama3.1_2of4_fp8_compressed.yaml b/.buildkite/lm-eval-harness/configs/SparseLlama3.1_2of4_fp8_compressed.yaml
diff --git a/.buildkite/lm-eval-harness/configs/models-large-rocm-fp8.txt b/.buildkite/lm-eval-harness/configs/models-large-rocm-fp8.txt
@@ -0,0 +1 @@
+Qwen3-235B-A22B-Instruct-2507-FP8.yaml
diff --git a/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh
@@ -41,4 +41,4 @@ lm_eval --model vllm-vlm \
   --tasks chartqa \
   --batch_size auto \
   --apply_chat_template \
-  --limit $LIMIT
+  --limit "$LIMIT"
diff --git a/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh
@@ -20,14 +20,11 @@ usage() {
     echo
 }
 
-while getopts "m:b:l:f:t:" OPT; do
+while getopts "m:l:f:t:" OPT; do
   case ${OPT} in
     m )
         MODEL="$OPTARG"
         ;;
-    b )
-        BATCH_SIZE="$OPTARG"
-        ;;
     l )
         LIMIT="$OPTARG"
         ;;

diff --git a/.buildkite/lm-eval-harness/test_lm_eval_correctness.py b/.buildkite/lm-eval-harness/test_lm_eval_correctness.py
@@ -13,9 +13,10 @@
 from contextlib import contextmanager
 
 import lm_eval
-import numpy as np
 import yaml
 
+from vllm.platforms import current_platform
+
 DEFAULT_RTOL = 0.08
 
 
@@ -63,6 +64,9 @@ def launch_lm_eval(eval_config, tp_size):
         "allow_deprecated_quantization=True,"
     )
 
+    if current_platform.is_rocm() and "Nemotron-3" in eval_config["model_name"]:
+        model_args += "attention_backend=TRITON_ATTN"
+
     env_vars = eval_config.get("env_vars", None)
     with scoped_env_vars(env_vars):
         results = lm_eval.simple_evaluate(
@@ -102,6 +106,8 @@ def test_lm_eval_correctness_param(config_filename, tp_size):
                 f"ground_truth={ground_truth:.3f} | "
                 f"measured={measured_value:.3f} | rtol={rtol}"
             )
-            success = success and np.isclose(ground_truth, measured_value, rtol=rtol)
+
+            min_acceptable = ground_truth * (1 - rtol)
+            success = success and measured_value >= min_acceptable
 
     assert success
diff --git a/.buildkite/performance-benchmarks/README.md b/.buildkite/performance-benchmarks/README.md
@@ -83,7 +83,6 @@ We test the throughput by using `vllm bench serve` with request rate = inf to co
         "server_parameters": {
             "model": "meta-llama/Meta-Llama-3-8B",
             "tensor_parallel_size": 1,
-            "swap_space": 16,
             "disable_log_stats": "",
             "load_format": "dummy"
         },