diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 092755ea085c..a9427a9366c9 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -141,7 +141,7 @@ steps: queue: cpu_queue_postmerge commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.2 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.2-devel-ubuntu22.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130 --target vllm-openai --progress plain -f docker/Dockerfile ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130 --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130" # re-tag to default image tag and push, just in case arm64 build fails - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130" @@ -154,7 +154,8 @@ steps: queue: arm64_cpu_queue_postmerge commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.2 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.2-devel-ubuntu22.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130 --target vllm-openai --progress plain -f docker/Dockerfile ." + # compute capability 12.0 for RTX-50 series / RTX PRO 6000 Blackwell, 12.1 for DGX Spark + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0 12.1' --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130 --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130" - label: "Create multi-arch manifest - CUDA 13.0" @@ -243,7 +244,6 @@ steps: # Build vLLM ROCm image using the base - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg BASE_IMAGE=rocm/vllm-dev:base-$BUILDKITE_COMMIT --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-rocm --target vllm-openai --progress plain -f docker/Dockerfile.rocm ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-rocm" - - label: "Build and publish nightly multi-arch image to DockerHub" depends_on: @@ -252,17 +252,7 @@ steps: agents: queue: small_cpu_queue_postmerge commands: - - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64" - - "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64" - - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64 vllm/vllm-openai:nightly-x86_64" - - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64 vllm/vllm-openai:nightly-aarch64" - - "docker push vllm/vllm-openai:nightly-x86_64" - - "docker push vllm/vllm-openai:nightly-aarch64" - - "docker manifest create vllm/vllm-openai:nightly vllm/vllm-openai:nightly-x86_64 vllm/vllm-openai:nightly-aarch64 --amend" - - "docker manifest create vllm/vllm-openai:nightly-$BUILDKITE_COMMIT vllm/vllm-openai:nightly-x86_64 vllm/vllm-openai:nightly-aarch64 --amend" - - "docker manifest push vllm/vllm-openai:nightly" - - "docker manifest push vllm/vllm-openai:nightly-$BUILDKITE_COMMIT" + - "bash .buildkite/scripts/push-nightly-builds.sh" # Clean up old nightly builds (keep only last 14) - "bash .buildkite/scripts/cleanup-nightly-builds.sh" plugins: @@ -273,6 +263,25 @@ steps: DOCKER_BUILDKIT: "1" DOCKERHUB_USERNAME: "vllmbot" + - label: "Build and publish nightly multi-arch image to DockerHub - CUDA 13.0" + depends_on: + - create-multi-arch-manifest-cuda-13-0 + if: build.env("NIGHTLY") == "1" + agents: + queue: small_cpu_queue_postmerge + commands: + - "bash .buildkite/scripts/push-nightly-builds.sh cu130" + # Clean up old nightly builds (keep only last 14) + - "bash .buildkite/scripts/cleanup-nightly-builds.sh cu130-nightly-" + plugins: + - docker-login#v3.0.0: + username: vllmbot + password-env: DOCKERHUB_TOKEN + env: + DOCKER_BUILDKIT: "1" + DOCKERHUB_USERNAME: "vllmbot" + + # ============================================================================= # ROCm Release Pipeline (x86_64 only) # ============================================================================= diff --git a/.buildkite/scripts/cleanup-nightly-builds.sh b/.buildkite/scripts/cleanup-nightly-builds.sh index f02a128c6772..9e015e19f91c 100755 --- a/.buildkite/scripts/cleanup-nightly-builds.sh +++ b/.buildkite/scripts/cleanup-nightly-builds.sh @@ -3,7 +3,14 @@ set -ex # Clean up old nightly builds from DockerHub, keeping only the last 14 builds -# This script uses DockerHub API to list and delete old tags with "nightly-" prefix +# This script uses DockerHub API to list and delete old tags with specified prefix +# Usage: cleanup-nightly-builds.sh [TAG_PREFIX] +# Example: cleanup-nightly-builds.sh "nightly-" or cleanup-nightly-builds.sh "cu130-nightly-" + +# Get tag prefix from argument, default to "nightly-" if not provided +TAG_PREFIX="${1:-nightly-}" + +echo "Cleaning up tags with prefix: $TAG_PREFIX" # DockerHub API endpoint for vllm/vllm-openai repository REPO_API_URL="https://hub.docker.com/v2/repositories/vllm/vllm-openai/tags" @@ -45,7 +52,7 @@ get_all_tags() { set -x # Get both last_updated timestamp and tag name, separated by | - local tags=$(echo "$response" | jq -r '.results[] | select(.name | startswith("nightly-")) | "\(.last_updated)|\(.name)"') + local tags=$(echo "$response" | jq -r --arg prefix "$TAG_PREFIX" '.results[] | select(.name | startswith($prefix)) | "\(.last_updated)|\(.name)"') if [ -z "$tags" ]; then break diff --git a/.buildkite/scripts/push-nightly-builds.sh b/.buildkite/scripts/push-nightly-builds.sh new file mode 100755 index 000000000000..98e80fd99ec4 --- /dev/null +++ b/.buildkite/scripts/push-nightly-builds.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +set -ex + +# Get tag variant from argument, default to empty if not provided, should be something like "cu130". +# Due to limits in cleanup script, we must move variants to use separate tags like "cu130-nightly", +# otherwise they will be cleaned up together with the main "nightly" tags. + +TAG_VARIANT="$1" +if [ -n "$TAG_VARIANT" ]; then + ORIG_TAG_SUFFIX="-$TAG_VARIANT" + TAG_NAME="$TAG_VARIANT-nightly" +else + ORIG_TAG_SUFFIX="" + TAG_NAME="nightly" +fi + +ORIG_TAG_NAME="$BUILDKITE_COMMIT" + +echo "Pushing original tag $ORIG_TAG_NAME$ORIG_TAG_SUFFIX to new nightly tag name: $TAG_NAME" + +# pull original arch-dependent images from AWS ECR Public +aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7 +docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$ORIG_TAG_NAME-x86_64$ORIG_TAG_SUFFIX +docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$ORIG_TAG_NAME-aarch64$ORIG_TAG_SUFFIX +# tag arch-dependent images +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$ORIG_TAG_NAME-x86_64$ORIG_TAG_SUFFIX vllm/vllm-openai:$TAG_NAME-x86_64 +docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$ORIG_TAG_NAME-aarch64$ORIG_TAG_SUFFIX vllm/vllm-openai:$TAG_NAME-aarch64 +# push arch-dependent images to DockerHub +docker push vllm/vllm-openai:$TAG_NAME-x86_64 +docker push vllm/vllm-openai:$TAG_NAME-aarch64 +# push arch-independent manifest to DockerHub +docker manifest create vllm/vllm-openai:$TAG_NAME vllm/vllm-openai:$TAG_NAME-x86_64 vllm/vllm-openai:$TAG_NAME-aarch64 --amend +docker manifest create vllm/vllm-openai:$TAG_NAME-$BUILDKITE_COMMIT vllm/vllm-openai:$TAG_NAME-x86_64 vllm/vllm-openai:$TAG_NAME-aarch64 --amend +docker manifest push vllm/vllm-openai:$TAG_NAME +docker manifest push vllm/vllm-openai:$TAG_NAME-$BUILDKITE_COMMIT