From 8e520ee5944edd748c12ce2858c331bf79be137e Mon Sep 17 00:00:00 2001 From: khluu Date: Mon, 4 May 2026 22:17:50 -0700 Subject: [PATCH 1/5] [CI] Automate Docker Hub release image publishing Move docker tag/push/manifest logic from the annotation script into a new publish-release-images.sh that runs as an automated pipeline step. Adds all image variants including Ubuntu 24.04, CUDA 12.9, ROCm, and CPU with graceful fallback for CPU images behind their own block steps. Co-authored-by: Claude Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: khluu --- .buildkite/release-pipeline.yaml | 27 +++ .buildkite/scripts/annotate-release.sh | 94 +--------- .buildkite/scripts/publish-release-images.sh | 172 +++++++++++++++++++ 3 files changed, 200 insertions(+), 93 deletions(-) create mode 100755 .buildkite/scripts/publish-release-images.sh diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index f122c423ba5c..612b903031b6 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -436,6 +436,33 @@ steps: DOCKER_BUILDKIT: "1" DOCKERHUB_USERNAME: "vllmbot" + - block: "Publish release images to DockerHub" + key: block-publish-release-images + depends_on: + - annotate-release-workflow + - create-multi-arch-manifest-cuda-12-9 + - create-multi-arch-manifest-ubuntu2404 + - create-multi-arch-manifest-cuda-12-9-ubuntu2404 + - build-rocm-release-image + - input-release-version + if: build.env("NIGHTLY") != "1" + + - label: "Publish release images to DockerHub" + depends_on: + - block-publish-release-images + id: publish-release-images-dockerhub + agents: + queue: small_cpu_queue_release + commands: + - "bash .buildkite/scripts/publish-release-images.sh" + plugins: + - docker-login#v3.0.0: + username: vllmbot + password-env: DOCKERHUB_TOKEN + env: + DOCKER_BUILDKIT: "1" + DOCKERHUB_USERNAME: "vllmbot" + - group: "Publish wheels" key: "publish-wheels" steps: diff --git a/.buildkite/scripts/annotate-release.sh b/.buildkite/scripts/annotate-release.sh index 6f41d1cdda47..afa884fba46b 100755 --- a/.buildkite/scripts/annotate-release.sh +++ b/.buildkite/scripts/annotate-release.sh @@ -8,8 +8,6 @@ if [ -z "${RELEASE_VERSION}" ]; then RELEASE_VERSION="1.0.0.dev" fi -ROCM_BASE_CACHE_KEY=$(.buildkite/scripts/cache-rocm-base-wheels.sh key) - buildkite-agent annotate --style 'info' --context 'release-workflow' << EOF To download the wheel (by commit): \`\`\` @@ -25,95 +23,5 @@ aws s3 cp s3://vllm-wheels/${BUILDKITE_COMMIT}/vllm-${RELEASE_VERSION}+cpu-cp38- aws s3 cp s3://vllm-wheels/${BUILDKITE_COMMIT}/vllm-${RELEASE_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl . \`\`\` - -To download and upload the image: - -\`\`\` -# Download images: - -docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-x86_64 -docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-aarch64 -docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-x86_64-cu129 -docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-aarch64-cu129 -docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${ROCM_BASE_CACHE_KEY}-rocm-base -docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-rocm -docker pull public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION} -docker pull public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION} - -# Tag and push images: - -## CUDA - -docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-x86_64 vllm/vllm-openai:x86_64 -docker tag vllm/vllm-openai:x86_64 vllm/vllm-openai:latest-x86_64 -docker tag vllm/vllm-openai:x86_64 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64 -docker push vllm/vllm-openai:latest-x86_64 -docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64 - -docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-x86_64-cu129 vllm/vllm-openai:x86_64-cu129 -docker tag vllm/vllm-openai:x86_64-cu129 vllm/vllm-openai:latest-x86_64-cu129 -docker tag vllm/vllm-openai:x86_64-cu129 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129 -docker push vllm/vllm-openai:latest-x86_64-cu129 -docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129 - -docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-aarch64 vllm/vllm-openai:aarch64 -docker tag vllm/vllm-openai:aarch64 vllm/vllm-openai:latest-aarch64 -docker tag vllm/vllm-openai:aarch64 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64 -docker push vllm/vllm-openai:latest-aarch64 -docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64 - -docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-aarch64-cu129 vllm/vllm-openai:aarch64-cu129 -docker tag vllm/vllm-openai:aarch64-cu129 vllm/vllm-openai:latest-aarch64-cu129 -docker tag vllm/vllm-openai:aarch64-cu129 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129 -docker push vllm/vllm-openai:latest-aarch64-cu129 -docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129 - -## ROCm - -docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-rocm vllm/vllm-openai-rocm:${BUILDKITE_COMMIT} -docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT} vllm/vllm-openai-rocm:latest -docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT} vllm/vllm-openai-rocm:v${RELEASE_VERSION} -docker push vllm/vllm-openai-rocm:latest -docker push vllm/vllm-openai-rocm:v${RELEASE_VERSION} - -docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${ROCM_BASE_CACHE_KEY}-rocm-base vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}-base -docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}-base vllm/vllm-openai-rocm:latest-base -docker tag vllm/vllm-openai-rocm:${BUILDKITE_COMMIT}-base vllm/vllm-openai-rocm:v${RELEASE_VERSION}-base -docker push vllm/vllm-openai-rocm:latest-base -docker push vllm/vllm-openai-rocm:v${RELEASE_VERSION}-base - -## CPU - -docker tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION} vllm/vllm-openai-cpu:x86_64 -docker tag vllm/vllm-openai-cpu:x86_64 vllm/vllm-openai-cpu:latest-x86_64 -docker tag vllm/vllm-openai-cpu:x86_64 vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 -docker push vllm/vllm-openai-cpu:latest-x86_64 -docker push vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 - -docker tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION} vllm/vllm-openai-cpu:arm64 -docker tag vllm/vllm-openai-cpu:arm64 vllm/vllm-openai-cpu:latest-arm64 -docker tag vllm/vllm-openai-cpu:arm64 vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64 -docker push vllm/vllm-openai-cpu:latest-arm64 -docker push vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64 - -# Create multi-arch manifest: - -docker manifest rm vllm/vllm-openai:latest -docker manifest create vllm/vllm-openai:latest vllm/vllm-openai:latest-x86_64 vllm/vllm-openai:latest-aarch64 -docker manifest create vllm/vllm-openai:v${RELEASE_VERSION} vllm/vllm-openai:v${RELEASE_VERSION}-x86_64 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64 -docker manifest push vllm/vllm-openai:latest -docker manifest push vllm/vllm-openai:v${RELEASE_VERSION} - -docker manifest rm vllm/vllm-openai:latest-cu129 -docker manifest create vllm/vllm-openai:latest-cu129 vllm/vllm-openai:latest-x86_64-cu129 vllm/vllm-openai:latest-aarch64-cu129 -docker manifest create vllm/vllm-openai:v${RELEASE_VERSION}-cu129 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129 -docker manifest push vllm/vllm-openai:latest-cu129 -docker manifest push vllm/vllm-openai:v${RELEASE_VERSION}-cu129 - -docker manifest rm vllm/vllm-openai-cpu:latest || true -docker manifest create vllm/vllm-openai-cpu:latest vllm/vllm-openai-cpu:latest-x86_64 vllm/vllm-openai-cpu:latest-arm64 -docker manifest create vllm/vllm-openai-cpu:v${RELEASE_VERSION} vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64 -docker manifest push vllm/vllm-openai-cpu:latest -docker manifest push vllm/vllm-openai-cpu:v${RELEASE_VERSION} -\`\`\` +Docker images are published automatically by the "Publish release images to DockerHub" pipeline step. EOF diff --git a/.buildkite/scripts/publish-release-images.sh b/.buildkite/scripts/publish-release-images.sh new file mode 100755 index 000000000000..83d9c159a7a5 --- /dev/null +++ b/.buildkite/scripts/publish-release-images.sh @@ -0,0 +1,172 @@ +#!/bin/bash +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +# +# Publish release Docker images from ECR to DockerHub. +# Pulls per-arch images, tags with latest and versioned tags, pushes them, +# then creates and pushes multi-arch manifests. + +set -euo pipefail + +RELEASE_VERSION=$(buildkite-agent meta-data get release-version 2>/dev/null | sed 's/^v//') +if [ -z "${RELEASE_VERSION}" ]; then + echo "ERROR: release-version metadata not set" + exit 1 +fi + +COMMIT="$BUILDKITE_COMMIT" +ROCM_BASE_CACHE_KEY=$(.buildkite/scripts/cache-rocm-base-wheels.sh key) + +echo "========================================" +echo "Publishing release images v${RELEASE_VERSION}" +echo " Commit: ${COMMIT}" +echo " ROCm base cache key: ${ROCM_BASE_CACHE_KEY}" +echo "========================================" + +# Login to ECR to pull staging images +aws ecr-public get-login-password --region us-east-1 | \ + docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7 + +# ---- CUDA (default: 13.0) ---- + +docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64 +docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64 + +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64 vllm/vllm-openai:latest-x86_64 +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64 +docker push vllm/vllm-openai:latest-x86_64 +docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64 + +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64 vllm/vllm-openai:latest-aarch64 +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64 +docker push vllm/vllm-openai:latest-aarch64 +docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64 + +docker manifest rm vllm/vllm-openai:latest || true +docker manifest rm vllm/vllm-openai:v${RELEASE_VERSION} || true +docker manifest create vllm/vllm-openai:latest vllm/vllm-openai:latest-x86_64 vllm/vllm-openai:latest-aarch64 +docker manifest create vllm/vllm-openai:v${RELEASE_VERSION} vllm/vllm-openai:v${RELEASE_VERSION}-x86_64 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64 +docker manifest push vllm/vllm-openai:latest +docker manifest push vllm/vllm-openai:v${RELEASE_VERSION} + +# ---- CUDA 12.9 ---- + +docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-cu129 +docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-cu129 + +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-cu129 vllm/vllm-openai:latest-x86_64-cu129 +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-cu129 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129 +docker push vllm/vllm-openai:latest-x86_64-cu129 +docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129 + +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-cu129 vllm/vllm-openai:latest-aarch64-cu129 +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-cu129 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129 +docker push vllm/vllm-openai:latest-aarch64-cu129 +docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129 + +docker manifest rm vllm/vllm-openai:latest-cu129 || true +docker manifest rm vllm/vllm-openai:v${RELEASE_VERSION}-cu129 || true +docker manifest create vllm/vllm-openai:latest-cu129 vllm/vllm-openai:latest-x86_64-cu129 vllm/vllm-openai:latest-aarch64-cu129 +docker manifest create vllm/vllm-openai:v${RELEASE_VERSION}-cu129 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129 +docker manifest push vllm/vllm-openai:latest-cu129 +docker manifest push vllm/vllm-openai:v${RELEASE_VERSION}-cu129 + +# ---- Ubuntu 24.04 (CUDA 13.0) ---- + +docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-ubuntu2404 +docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-ubuntu2404 + +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-ubuntu2404 vllm/vllm-openai:latest-x86_64-ubuntu2404 +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-ubuntu2404 +docker push vllm/vllm-openai:latest-x86_64-ubuntu2404 +docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-ubuntu2404 + +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-ubuntu2404 vllm/vllm-openai:latest-aarch64-ubuntu2404 +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-ubuntu2404 +docker push vllm/vllm-openai:latest-aarch64-ubuntu2404 +docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-ubuntu2404 + +docker manifest rm vllm/vllm-openai:latest-ubuntu2404 || true +docker manifest rm vllm/vllm-openai:v${RELEASE_VERSION}-ubuntu2404 || true +docker manifest create vllm/vllm-openai:latest-ubuntu2404 vllm/vllm-openai:latest-x86_64-ubuntu2404 vllm/vllm-openai:latest-aarch64-ubuntu2404 +docker manifest create vllm/vllm-openai:v${RELEASE_VERSION}-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-ubuntu2404 +docker manifest push vllm/vllm-openai:latest-ubuntu2404 +docker manifest push vllm/vllm-openai:v${RELEASE_VERSION}-ubuntu2404 + +# ---- Ubuntu 24.04 (CUDA 12.9) ---- + +docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-cu129-ubuntu2404 +docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-cu129-ubuntu2404 + +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-cu129-ubuntu2404 vllm/vllm-openai:latest-x86_64-cu129-ubuntu2404 +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-x86_64-cu129-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129-ubuntu2404 +docker push vllm/vllm-openai:latest-x86_64-cu129-ubuntu2404 +docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129-ubuntu2404 + +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-cu129-ubuntu2404 vllm/vllm-openai:latest-aarch64-cu129-ubuntu2404 +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-aarch64-cu129-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129-ubuntu2404 +docker push vllm/vllm-openai:latest-aarch64-cu129-ubuntu2404 +docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129-ubuntu2404 + +docker manifest rm vllm/vllm-openai:latest-cu129-ubuntu2404 || true +docker manifest rm vllm/vllm-openai:v${RELEASE_VERSION}-cu129-ubuntu2404 || true +docker manifest create vllm/vllm-openai:latest-cu129-ubuntu2404 vllm/vllm-openai:latest-x86_64-cu129-ubuntu2404 vllm/vllm-openai:latest-aarch64-cu129-ubuntu2404 +docker manifest create vllm/vllm-openai:v${RELEASE_VERSION}-cu129-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64-cu129-ubuntu2404 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64-cu129-ubuntu2404 +docker manifest push vllm/vllm-openai:latest-cu129-ubuntu2404 +docker manifest push vllm/vllm-openai:v${RELEASE_VERSION}-cu129-ubuntu2404 + +# ---- ROCm ---- + +docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-rocm +docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${ROCM_BASE_CACHE_KEY}-rocm-base + +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-rocm vllm/vllm-openai-rocm:latest +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${COMMIT}-rocm vllm/vllm-openai-rocm:v${RELEASE_VERSION} +docker push vllm/vllm-openai-rocm:latest +docker push vllm/vllm-openai-rocm:v${RELEASE_VERSION} + +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${ROCM_BASE_CACHE_KEY}-rocm-base vllm/vllm-openai-rocm:latest-base +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${ROCM_BASE_CACHE_KEY}-rocm-base vllm/vllm-openai-rocm:v${RELEASE_VERSION}-base +docker push vllm/vllm-openai-rocm:latest-base +docker push vllm/vllm-openai-rocm:v${RELEASE_VERSION}-base + +# ---- CPU ---- +# CPU images are behind separate block steps and may not have been built. +# Attempt to pull and publish; skip gracefully if images are not available. + +CPU_X86=false +CPU_ARM=false + +if docker pull public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION} 2>/dev/null; then + docker tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION} vllm/vllm-openai-cpu:latest-x86_64 + docker tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION} vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 + docker push vllm/vllm-openai-cpu:latest-x86_64 + docker push vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 + CPU_X86=true +else + echo "WARNING: x86_64 CPU image not found, skipping (ensure block-cpu-release-image-build was unblocked)" +fi + +if docker pull public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION} 2>/dev/null; then + docker tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION} vllm/vllm-openai-cpu:latest-arm64 + docker tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION} vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64 + docker push vllm/vllm-openai-cpu:latest-arm64 + docker push vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64 + CPU_ARM=true +else + echo "WARNING: arm64 CPU image not found, skipping (ensure block-arm64-cpu-release-image-build was unblocked)" +fi + +if [ "$CPU_X86" = "true" ] && [ "$CPU_ARM" = "true" ]; then + docker manifest rm vllm/vllm-openai-cpu:latest || true + docker manifest rm vllm/vllm-openai-cpu:v${RELEASE_VERSION} || true + docker manifest create vllm/vllm-openai-cpu:latest vllm/vllm-openai-cpu:latest-x86_64 vllm/vllm-openai-cpu:latest-arm64 + docker manifest create vllm/vllm-openai-cpu:v${RELEASE_VERSION} vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64 + docker manifest push vllm/vllm-openai-cpu:latest + docker manifest push vllm/vllm-openai-cpu:v${RELEASE_VERSION} +else + echo "WARNING: Skipping CPU multi-arch manifest (both x86_64 and arm64 images required)" +fi + +echo "" +echo "Successfully published release images for v${RELEASE_VERSION}" From 2b83d0c13f8a43bfa473409c933f8dd513642a58 Mon Sep 17 00:00:00 2001 From: khluu Date: Tue, 5 May 2026 15:55:05 -0700 Subject: [PATCH 2/5] [CI] Address review: harden CPU publish path and close race window - Use `docker manifest inspect` to distinguish a missing CPU image (skip with warning) from a real pull failure (fail loudly under set -e), instead of `if docker pull ... 2>/dev/null; then` which masked all non-zero exits as "image not built". - Add the CPU build steps to `block-publish-release-images.depends_on` with `allow_failure: true`, so publish doesn't race an in-progress CPU build but still proceeds when the operator legitimately leaves the CPU block steps unblocked. - Normalize the `v` prefix on the CPU build-side tags too, so the round-trip with the publish script is robust regardless of whether the operator types `1.2.3` or `v1.2.3` into the input field. - Use `buildkite-agent meta-data get --default ""` so the explicit empty-check is reachable under `set -euo pipefail` (otherwise pipefail aborts before the check). Signed-off-by: Kevin H. Luu Co-authored-by: Claude Signed-off-by: khluu --- .buildkite/release-pipeline.yaml | 20 ++++++++++++---- .buildkite/scripts/publish-release-images.sh | 25 ++++++++++++-------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 612b903031b6..6910f65ed82c 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -309,6 +309,7 @@ steps: depends_on: ~ - label: "Build release image - x86_64 - CPU" + key: build-cpu-release-image-x86 depends_on: - block-cpu-release-image-build - input-release-version @@ -316,9 +317,9 @@ steps: queue: cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_X86=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_X86=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v$(buildkite-agent meta-data get release-version | sed 's/^v//') --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest" - - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)" + - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v$(buildkite-agent meta-data get release-version | sed 's/^v//')" env: DOCKER_BUILDKIT: "1" @@ -327,16 +328,17 @@ steps: depends_on: ~ - label: "Build release image - arm64 - CPU" - depends_on: + key: build-cpu-release-image-arm64 + depends_on: - block-arm64-cpu-release-image-build - input-release-version agents: queue: arm64_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v$(buildkite-agent meta-data get release-version | sed 's/^v//') --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." - "docker push public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:latest" - - "docker push public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:$(buildkite-agent meta-data get release-version)" + - "docker push public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v$(buildkite-agent meta-data get release-version | sed 's/^v//')" env: DOCKER_BUILDKIT: "1" @@ -445,6 +447,14 @@ steps: - create-multi-arch-manifest-cuda-12-9-ubuntu2404 - build-rocm-release-image - input-release-version + # Wait for CPU builds if their block steps were unblocked, so publish + # doesn't race the in-progress CPU build. allow_failure lets publish + # proceed when the operator legitimately leaves the CPU block steps + # unblocked or the CPU build fails. + - step: build-cpu-release-image-x86 + allow_failure: true + - step: build-cpu-release-image-arm64 + allow_failure: true if: build.env("NIGHTLY") != "1" - label: "Publish release images to DockerHub" diff --git a/.buildkite/scripts/publish-release-images.sh b/.buildkite/scripts/publish-release-images.sh index 83d9c159a7a5..09f7bed75145 100755 --- a/.buildkite/scripts/publish-release-images.sh +++ b/.buildkite/scripts/publish-release-images.sh @@ -8,7 +8,7 @@ set -euo pipefail -RELEASE_VERSION=$(buildkite-agent meta-data get release-version 2>/dev/null | sed 's/^v//') +RELEASE_VERSION=$(buildkite-agent meta-data get release-version --default "" | sed 's/^v//') if [ -z "${RELEASE_VERSION}" ]; then echo "ERROR: release-version metadata not set" exit 1 @@ -132,29 +132,34 @@ docker push vllm/vllm-openai-rocm:v${RELEASE_VERSION}-base # ---- CPU ---- # CPU images are behind separate block steps and may not have been built. -# Attempt to pull and publish; skip gracefully if images are not available. +# Use `docker manifest inspect` to distinguish a missing image (skip with +# warning) from a real pull failure (fail loudly under set -e). CPU_X86=false CPU_ARM=false -if docker pull public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION} 2>/dev/null; then - docker tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION} vllm/vllm-openai-cpu:latest-x86_64 - docker tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION} vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 +CPU_X86_TAG=public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION} +if docker manifest inspect "${CPU_X86_TAG}" >/dev/null 2>&1; then + docker pull "${CPU_X86_TAG}" + docker tag "${CPU_X86_TAG}" vllm/vllm-openai-cpu:latest-x86_64 + docker tag "${CPU_X86_TAG}" vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 docker push vllm/vllm-openai-cpu:latest-x86_64 docker push vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 CPU_X86=true else - echo "WARNING: x86_64 CPU image not found, skipping (ensure block-cpu-release-image-build was unblocked)" + echo "WARNING: x86_64 CPU image not found at ${CPU_X86_TAG}, skipping (ensure block-cpu-release-image-build was unblocked and the build finished pushing)" fi -if docker pull public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION} 2>/dev/null; then - docker tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION} vllm/vllm-openai-cpu:latest-arm64 - docker tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION} vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64 +CPU_ARM_TAG=public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION} +if docker manifest inspect "${CPU_ARM_TAG}" >/dev/null 2>&1; then + docker pull "${CPU_ARM_TAG}" + docker tag "${CPU_ARM_TAG}" vllm/vllm-openai-cpu:latest-arm64 + docker tag "${CPU_ARM_TAG}" vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64 docker push vllm/vllm-openai-cpu:latest-arm64 docker push vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64 CPU_ARM=true else - echo "WARNING: arm64 CPU image not found, skipping (ensure block-arm64-cpu-release-image-build was unblocked)" + echo "WARNING: arm64 CPU image not found at ${CPU_ARM_TAG}, skipping (ensure block-arm64-cpu-release-image-build was unblocked and the build finished pushing)" fi if [ "$CPU_X86" = "true" ] && [ "$CPU_ARM" = "true" ]; then From 8f214e0bcbdbcbe357731833a1a3796927d074de Mon Sep 17 00:00:00 2001 From: khluu Date: Tue, 5 May 2026 16:17:40 -0700 Subject: [PATCH 3/5] [CI] Address review: all-or-nothing CPU publish, drop redundant v-strip - publish-release-images.sh: restructure CPU section to inspect both arches up front, then either publish everything (per-arch tags + multi-arch manifest) or fail loudly. The previous structure pushed per-arch tags independently of the multi-arch gate, so a partial state (one arch fails under publish gate's `allow_failure: true`) would leave `:latest-x86_64` pointing at the new release while the `:latest` multi-arch manifest still resolved to the previous release. - release-pipeline.yaml: revert the build-side `| sed 's/^v//'` normalization. `buildkite-agent meta-data get release-version` always returns the value with the `v` prefix already, so the strip-then- re-prepend was a no-op and added noise. Signed-off-by: Kevin H. Luu Co-authored-by: Claude Signed-off-by: khluu --- .buildkite/release-pipeline.yaml | 8 ++-- .buildkite/scripts/publish-release-images.sh | 39 +++++++++++--------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 6910f65ed82c..ec8b3ea699d9 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -317,9 +317,9 @@ steps: queue: cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_X86=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v$(buildkite-agent meta-data get release-version | sed 's/^v//') --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_X86=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest" - - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v$(buildkite-agent meta-data get release-version | sed 's/^v//')" + - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)" env: DOCKER_BUILDKIT: "1" @@ -336,9 +336,9 @@ steps: queue: arm64_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v$(buildkite-agent meta-data get release-version | sed 's/^v//') --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." - "docker push public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:latest" - - "docker push public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v$(buildkite-agent meta-data get release-version | sed 's/^v//')" + - "docker push public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:$(buildkite-agent meta-data get release-version)" env: DOCKER_BUILDKIT: "1" diff --git a/.buildkite/scripts/publish-release-images.sh b/.buildkite/scripts/publish-release-images.sh index 09f7bed75145..ec319aa76006 100755 --- a/.buildkite/scripts/publish-release-images.sh +++ b/.buildkite/scripts/publish-release-images.sh @@ -132,45 +132,48 @@ docker push vllm/vllm-openai-rocm:v${RELEASE_VERSION}-base # ---- CPU ---- # CPU images are behind separate block steps and may not have been built. -# Use `docker manifest inspect` to distinguish a missing image (skip with -# warning) from a real pull failure (fail loudly under set -e). - -CPU_X86=false -CPU_ARM=false +# All-or-nothing: inspect both arches first, then either publish everything +# (per-arch + multi-arch manifest) or skip everything. Publishing only one +# arch would leave `:latest-x86_64` pointing at the new release while the +# `:latest` multi-arch manifest still resolves to the previous release. CPU_X86_TAG=public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v${RELEASE_VERSION} -if docker manifest inspect "${CPU_X86_TAG}" >/dev/null 2>&1; then +CPU_ARM_TAG=public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION} + +CPU_X86_AVAILABLE=false +CPU_ARM_AVAILABLE=false +docker manifest inspect "${CPU_X86_TAG}" >/dev/null 2>&1 && CPU_X86_AVAILABLE=true +docker manifest inspect "${CPU_ARM_TAG}" >/dev/null 2>&1 && CPU_ARM_AVAILABLE=true + +if [ "$CPU_X86_AVAILABLE" = "true" ] && [ "$CPU_ARM_AVAILABLE" = "true" ]; then docker pull "${CPU_X86_TAG}" docker tag "${CPU_X86_TAG}" vllm/vllm-openai-cpu:latest-x86_64 docker tag "${CPU_X86_TAG}" vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 docker push vllm/vllm-openai-cpu:latest-x86_64 docker push vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 - CPU_X86=true -else - echo "WARNING: x86_64 CPU image not found at ${CPU_X86_TAG}, skipping (ensure block-cpu-release-image-build was unblocked and the build finished pushing)" -fi -CPU_ARM_TAG=public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:v${RELEASE_VERSION} -if docker manifest inspect "${CPU_ARM_TAG}" >/dev/null 2>&1; then docker pull "${CPU_ARM_TAG}" docker tag "${CPU_ARM_TAG}" vllm/vllm-openai-cpu:latest-arm64 docker tag "${CPU_ARM_TAG}" vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64 docker push vllm/vllm-openai-cpu:latest-arm64 docker push vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64 - CPU_ARM=true -else - echo "WARNING: arm64 CPU image not found at ${CPU_ARM_TAG}, skipping (ensure block-arm64-cpu-release-image-build was unblocked and the build finished pushing)" -fi -if [ "$CPU_X86" = "true" ] && [ "$CPU_ARM" = "true" ]; then docker manifest rm vllm/vllm-openai-cpu:latest || true docker manifest rm vllm/vllm-openai-cpu:v${RELEASE_VERSION} || true docker manifest create vllm/vllm-openai-cpu:latest vllm/vllm-openai-cpu:latest-x86_64 vllm/vllm-openai-cpu:latest-arm64 docker manifest create vllm/vllm-openai-cpu:v${RELEASE_VERSION} vllm/vllm-openai-cpu:v${RELEASE_VERSION}-x86_64 vllm/vllm-openai-cpu:v${RELEASE_VERSION}-arm64 docker manifest push vllm/vllm-openai-cpu:latest docker manifest push vllm/vllm-openai-cpu:v${RELEASE_VERSION} +elif [ "$CPU_X86_AVAILABLE" = "false" ] && [ "$CPU_ARM_AVAILABLE" = "false" ]; then + echo "WARNING: Neither CPU image found in ECR, skipping CPU publish (ensure block-cpu-release-image-build and block-arm64-cpu-release-image-build were unblocked and the builds finished pushing)" else - echo "WARNING: Skipping CPU multi-arch manifest (both x86_64 and arm64 images required)" + # Partial state: one arch built, the other did not. Fail loudly rather than + # ship a Docker Hub state where `:latest-${arch}` and `:latest` (multi-arch) + # disagree on which release they point at. + echo "ERROR: Partial CPU build detected (x86_64=${CPU_X86_AVAILABLE}, arm64=${CPU_ARM_AVAILABLE})." + echo " Refusing to publish to avoid split-tag drift between per-arch and multi-arch tags." + echo " Re-run the missing CPU build and retry, or manually publish if a single-arch release is intended." + exit 1 fi echo "" From 041d4b8a2116416c753cf26fb3c27f2c97e1d472 Mon Sep 17 00:00:00 2001 From: khluu Date: Tue, 5 May 2026 16:43:31 -0700 Subject: [PATCH 4/5] [CI] Install sdist build prerequisites before setup.py sdist The PyPI release script invokes `python setup.py sdist` in a fresh venv that only had `twine` installed, but setup.py imports torch, setuptools_scm, packaging, and setuptools at parse time. Install requirements/build/cuda.txt so those imports resolve. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: khluu --- .buildkite/scripts/upload-release-wheels-pypi.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.buildkite/scripts/upload-release-wheels-pypi.sh b/.buildkite/scripts/upload-release-wheels-pypi.sh index 058e5bbe4f4c..7e2077a2692c 100644 --- a/.buildkite/scripts/upload-release-wheels-pypi.sh +++ b/.buildkite/scripts/upload-release-wheels-pypi.sh @@ -39,10 +39,11 @@ fi set -x # avoid printing secrets above -# install twine from pypi +# install twine and sdist build prerequisites from pypi python3 -m venv /tmp/vllm-release-env source /tmp/vllm-release-env/bin/activate pip install twine +pip install -r requirements/build/cuda.txt python3 -m twine --version # copy release wheels to local directory From 426e477dab748a64b972007f0861762e83a06050 Mon Sep 17 00:00:00 2001 From: khluu Date: Tue, 5 May 2026 16:45:04 -0700 Subject: [PATCH 5/5] [CI] Address review: drop annotate dep, use key for publish step - block-publish-release-images: replace `annotate-release-workflow` with `create-multi-arch-manifest`. The annotate step posts wheel download info and isn't a real prerequisite for image publishing; swapping in the CUDA 13.0 multi-arch manifest step preserves the transitive wait on the CUDA 13.0 builds and matches the other variants already listed. - Use `key:` instead of `id:` on the publish step to match the canonical Buildkite identifier and the new CPU build step keys. Signed-off-by: Kevin H. Luu Co-authored-by: Claude Signed-off-by: khluu --- .buildkite/release-pipeline.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index ec8b3ea699d9..8a900c0bf862 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -441,7 +441,7 @@ steps: - block: "Publish release images to DockerHub" key: block-publish-release-images depends_on: - - annotate-release-workflow + - create-multi-arch-manifest - create-multi-arch-manifest-cuda-12-9 - create-multi-arch-manifest-ubuntu2404 - create-multi-arch-manifest-cuda-12-9-ubuntu2404 @@ -460,7 +460,7 @@ steps: - label: "Publish release images to DockerHub" depends_on: - block-publish-release-images - id: publish-release-images-dockerhub + key: publish-release-images-dockerhub agents: queue: small_cpu_queue_release commands: