omni-nicelab · yJader · Feb 13, 2026 · Feb 28, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml
@@ -0,0 +1,38 @@
+steps:
+  - label: ":docker: Build image"
+    key: image-build
+    commands:
+      - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
+      - "docker build --progress=plain --file docker/Dockerfile.ci -t vllm-omni-ci ."
+      - "docker tag vllm-omni-ci public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT"
+      - "docker push public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT"
+    agents:
+      queue: "cpu_queue_premerge"
+
+  # L2 Test
+  - label: "Upload Ready Pipeline"
+    depends_on: image-build
+    key: upload-ready-pipeline
+    commands:
+      - buildkite-agent pipeline upload .buildkite/test-ready.yml
+    agents:
+      queue: "cpu_queue_premerge"
+
+  # L3 Test
+  - label: "Upload Merge Pipeline"
+    depends_on: image-build
+    key: upload-merge-pipeline
+    commands:
+      - buildkite-agent pipeline upload .buildkite/test-merge.yml
+    agents:
+      queue: "cpu_queue_premerge"
+
+  # L4 Test
+  - label: "Upload Nightly Pipeline"
+    depends_on: image-build
+    key: upload-nightly-pipeline
+    if: build.env("NIGHTLY") == "1"
+    commands:
+      - buildkite-agent pipeline upload .buildkite/test-nightly.yml
+    agents:
+      queue: "cpu_queue_premerge"
diff --git a/.buildkite/scripts/hardware_ci/run-xpu-test.sh b/.buildkite/scripts/hardware_ci/run-xpu-test.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# This script build the XPU docker image and run the offline inference inside the container.
+set -ex
+
+omni_source_dir=$(git rev-parse --show-toplevel)
+
+base_image_name="xpu/vllm-omni-ci-base:${VLLM_VERSION:?VLLM_VERSION must be set}"
+image_name="xpu/vllm-omni-ci:${BUILDKITE_COMMIT:?BUILDKITE_COMMIT must be set}"
+container_name="xpu_${BUILDKITE_COMMIT}_$(
+    tr -dc A-Za-z0-9 </dev/urandom | head -c 10
+    echo
+)"
+
+cd "${omni_source_dir}"
+if [ -z "$(docker images -q "${base_image_name}")" ]; then
+    docker build --target vllm-base -t "${base_image_name}" --build-arg "VLLM_VERSION=${VLLM_VERSION}" -f docker/Dockerfile.xpu .
+fi
+
+# Try building the docker image
+docker build --build-arg "VLLM_BASE=${base_image_name}" --build-arg "VLLM_VERSION=${VLLM_VERSION}" -t "${image_name}" -f docker/Dockerfile.xpu .
+
+# Setup cleanup
+remove_docker_container() {
+    docker rm -f "${container_name}" || true
+    docker image rm -f "${image_name}" || true
+    docker system prune -f || true
+}
+trap remove_docker_container EXIT
+
+HF_CACHE="${HF_CACHE:-$(realpath ~)/.cache/huggingface}"
+mkdir -p "${HF_CACHE}"
+HF_MOUNT="/root/.cache/huggingface"
+
+time timeout -k 30 30m docker run \
+    --device /dev/dri:/dev/dri \
+    --net=host \
+    --ipc=host \
+    -v /dev/dri/by-path:/dev/dri/by-path \
+    -v "${HF_CACHE}:${HF_MOUNT}" \
+    --security-opt seccomp=unconfined \
+    --entrypoint="" \
+    -e VLLM_LOGGING_LEVEL \
+    -e VLLM_OMNI_LOGGING_LEVEL \
+    -e HF_TOKEN \
+    -e ZE_AFFINITY_MASK \
+    --name "${container_name}" \
+    "${image_name}" \
+    bash -c '
+    set -e
+    echo $ZE_AFFINITY_MASK
+    pip install tblib==3.1.0
+    cd /workspace/vllm-omni
+    pytest -v -s -m "core_model and xpu and B60"
+    pytest -v -s -m "advanced_model and xpu and B60"
+'
diff --git a/.buildkite/test-nightly.yml b/.buildkite/test-nightly.yml
@@ -54,15 +54,72 @@ steps:
       - docker#v5.2.0:
           image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
           always-pull: true
+          propagate-environment: true
           shm-size: "8gb"
+          environment:
+            - "HF_HOME=/fsx/hf_cache"
+          volumes:
+            - "/fsx/hf_cache:/fsx/hf_cache"
+
+  - label: ":full_moon: Diffusion Model Test with H100"
+    timeout_in_minutes: 60
+    depends_on: upload-nightly-pipeline
+    # if: build.env("NIGHTLY") == "1"
+    commands:
+      - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+      - pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and diffusion and H100" --run-level "advanced_model"
+    agents:
+      queue: "mithril-h100-pool"
+    plugins:
+      - kubernetes:
+          podSpec:
+            containers:
+              - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+                resources:
+                  limits:
+                    nvidia.com/gpu: 2
+                volumeMounts:
+                  - name: devshm
+                    mountPath: /dev/shm
+                  - name: hf-cache
+                    mountPath: /root/.cache/huggingface
+                env:
+                  - name: HF_HOME
+                    value: /root/.cache/huggingface
+            nodeSelector:
+              node.kubernetes.io/instance-type: gpu-h100-sxm
+            volumes:
+              - name: devshm
+                emptyDir:
+                  medium: Memory
+              - name: hf-cache
+                hostPath:
+                  path: /mnt/hf-cache
+                  type: DirectoryOrCreate
+
+  - label: ":full_moon: Qwen3-TTS Non-Async-Chunk E2E Test"
+    timeout_in_minutes: 30
+    depends_on: upload-nightly-pipeline
+    if: build.env("NIGHTLY") == "1"
+    commands:
+      - |
+        huggingface-cli download Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice
+        export VLLM_WORKER_MULTIPROC_METHOD=spawn
+        pytest -s -v tests/e2e/online_serving/test_qwen3_tts.py::TestQwen3TTSNoAsyncChunk
+    agents:
+      queue: "gpu_4_queue"
+    plugins:
+      - docker#v5.2.0:
+          image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+          always-pull: true
           propagate-environment: true
           shm-size: "8gb"
           environment:
             - "HF_HOME=/fsx/hf_cache"
           volumes:
             - "/fsx/hf_cache:/fsx/hf_cache"
 
-  - label: ":full_moon: Omni Model Perf Test with H100"
+  - label: ":full_moon: Omni Model Perf Test & Test Case Statistics"
     key: nightly-performance
     timeout_in_minutes: 180
     depends_on: upload-nightly-pipeline
@@ -72,6 +129,8 @@ steps:
       - export BENCHMARK_DIR=tests
       - pytest -s -v tests/perf/scripts/run_benchmark.py
       - buildkite-agent artifact upload "tests/*.json"
+      - python tools/nightly/buildkite_testcase_statistics.py -o tests/buildkite_testcase_statistics.html
+      - buildkite-agent artifact upload "tests/*.html"
     agents:
       queue: "mithril-h100-pool"
     plugins:
@@ -101,17 +160,19 @@ steps:
                   path: /mnt/hf-cache
                   type: DirectoryOrCreate
 
-  - label: ":email: Nightly Perf Collection & Email"
+  - label: ":email: Nightly Collection & Email"
     key: nightly-perf-distribution
-    depends_on: nightly-performance
+    depends_on:
+    - nightly-performance
     if: build.env("NIGHTLY") == "1"
     commands:
       - pip install openpyxl
       - export DEFAULT_INPUT_DIR=tests
       - export DEFAULT_OUTPUT_DIR=tests
       - buildkite-agent artifact download "tests/*.json" . --step nightly-performance
+      - buildkite-agent artifact download "tests/*.html" . --step nightly-performance
       - python tools/nightly/generate_nightly_perf_excel.py
-      - python tools/nightly/send_nightly_perf_email.py
+      - python tools/nightly/send_nightly_email.py --report-file "tests/*.xlsx, tests/*.html"
       - buildkite-agent artifact upload "tests/*.xlsx"
     agents:
       queue: "cpu_queue_premerge"
diff --git a/.buildkite/test-ready.yml b/.buildkite/test-ready.yml
@@ -34,15 +34,7 @@ steps:
   - label: "Diffusion Model CPU offloading Test"
     depends_on: upload-ready-pipeline
     commands:
-      - |
-        timeout 20m bash -c '
-          set +e
-          pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py
-          EXIT1=$$?
-          pytest -s -v tests/e2e/offline_inference/test_diffusion_layerwise_offload.py
-          EXIT2=$$?
-          exit $$((EXIT1 | EXIT2))
-        '
+      - timeout 10m pytest -s -v tests/e2e/offline_inference/test_diffusion_cpu_offload.py
     agents:
       queue: "gpu_1_queue" # g6.4xlarge instance on AWS, has 1 L4 GPU
     plugins:
@@ -214,9 +206,10 @@ steps:
     commands:
       - |
         timeout 20m bash -c '
+          huggingface-cli download Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice
           export VLLM_LOGGING_LEVEL=DEBUG
           export VLLM_WORKER_MULTIPROC_METHOD=spawn
-          pytest -s -v tests/e2e/online_serving/test_qwen3_tts.py
+          pytest -s -v tests/e2e/online_serving/test_qwen3_tts.py -k "not NoAsyncChunk"
         '
     agents:
       queue: "gpu_4_queue"
@@ -268,39 +261,117 @@ steps:
   #                 path: /mnt/hf-cache
   #                 type: DirectoryOrCreate
 
-  # - label: "Bagel Text2Img Model Test with H100"
-  #   depends_on: upload-ready-pipeline
-  #   commands:
-  #     - |
-  #       timeout 30m bash -c '
-  #         export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  #         pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py
-  #       '
-  #   agents:
-  #     queue: "mithril-h100-pool"
-  #   plugins:
-  #     - kubernetes:
-  #         podSpec:
-  #           containers:
-  #             - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
-  #               resources:
-  #                 limits:
-  #                   nvidia.com/gpu: 1
-  #               volumeMounts:
-  #                 - name: devshm
-  #                   mountPath: /dev/shm
-  #                 - name: hf-cache
-  #                   mountPath: /root/.cache/huggingface
-  #               env:
-  #                 - name: HF_HOME
-  #                   value: /root/.cache/huggingface
-  #           nodeSelector:
-  #             node.kubernetes.io/instance-type: gpu-h100-sxm
-  #           volumes:
-  #             - name: devshm
-  #               emptyDir:
-  #                 medium: Memory
-  #             - name: hf-cache
-  #               hostPath:
-  #                 path: /mnt/hf-cache
-  #                 type: DirectoryOrCreate
+  - label: "Bagel Text2Img Model Test with H100"
+    depends_on: upload-ready-pipeline
+    commands:
+      - |
+        timeout 30m bash -c '
+          export VLLM_WORKER_MULTIPROC_METHOD=spawn
+          export VLLM_TEST_CLEAN_GPU_MEMORY=1
+          pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py
+        '
+    agents:
+      queue: "mithril-h100-pool"
+    plugins:
+      - kubernetes:
+          podSpec:
+            containers:
+              - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+                resources:
+                  limits:
+                    nvidia.com/gpu: 1
+                volumeMounts:
+                  - name: devshm
+                    mountPath: /dev/shm
+                  - name: hf-cache
+                    mountPath: /root/.cache/huggingface
+                env:
+                  - name: HF_HOME
+                    value: /root/.cache/huggingface
+            nodeSelector:
+              node.kubernetes.io/instance-type: gpu-h100-sxm
+            volumes:
+              - name: devshm
+                emptyDir:
+                  medium: Memory
+              - name: hf-cache
+                hostPath:
+                  path: /mnt/hf-cache
+                  type: DirectoryOrCreate
+
+  - label: "Bagel Img2Img Model Test with H100"
+    depends_on: upload-ready-pipeline
+    commands:
+      - |
+        timeout 30m bash -c '
+          export VLLM_WORKER_MULTIPROC_METHOD=spawn
+          export VLLM_TEST_CLEAN_GPU_MEMORY=1
+          pytest -s -v tests/e2e/offline_inference/test_bagel_img2img.py
+        '
+    agents:
+      queue: "mithril-h100-pool"
+    plugins:
+      - kubernetes:
+          podSpec:
+            containers:
+              - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+                resources:
+                  limits:
+                    nvidia.com/gpu: 1
+                volumeMounts:
+                  - name: devshm
+                    mountPath: /dev/shm
+                  - name: hf-cache
+                    mountPath: /root/.cache/huggingface
+                env:
+                  - name: HF_HOME
+                    value: /root/.cache/huggingface
+            nodeSelector:
+              node.kubernetes.io/instance-type: gpu-h100-sxm
+            volumes:
+              - name: devshm
+                emptyDir:
+                  medium: Memory
+              - name: hf-cache
+                hostPath:
+                  path: /mnt/hf-cache
+                  type: DirectoryOrCreate
+
+  - label: "Bagel Online Serving Test with H100"
+    depends_on: upload-ready-pipeline
+    commands:
+      - |
+        timeout 40m bash -c '
+          export VLLM_WORKER_MULTIPROC_METHOD=spawn
+          export VLLM_TEST_CLEAN_GPU_MEMORY=1
+          export VLLM_IMAGE_FETCH_TIMEOUT=60
+          pytest -s -v tests/e2e/online_serving/test_bagel_online.py
+        '
+    agents:
+      queue: "mithril-h100-pool"
+    plugins:
+      - kubernetes:
+          podSpec:
+            containers:
+              - image: 936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
+                resources:
+                  limits:
+                    nvidia.com/gpu: 1
+                volumeMounts:
+                  - name: devshm
+                    mountPath: /dev/shm
+                  - name: hf-cache
+                    mountPath: /root/.cache/huggingface
+                env:
+                  - name: HF_HOME
+                    value: /root/.cache/huggingface
+            nodeSelector:
+              node.kubernetes.io/instance-type: gpu-h100-sxm
+            volumes:
+              - name: devshm
+                emptyDir:
+                  medium: Memory
+              - name: hf-cache
+                hostPath:
+                  path: /mnt/hf-cache
+                  type: DirectoryOrCreate