Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 68 additions & 3 deletions .buildkite/release-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ steps:
commands:
# #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here:
# https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0 12.1' --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
- "mkdir artifacts"
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
- "bash .buildkite/scripts/upload-wheels.sh manylinux_2_35"
Expand Down Expand Up @@ -122,9 +122,46 @@ steps:
- "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64 --amend"
- "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"

# Build release images (13.0)
- label: "Build release image CUDA 13.0 (x86)"
depends_on: ~
id: build-release-image-cuda13-x86
agents:
queue: cpu_queue_postmerge
commands:
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ."
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13-$(uname -m)"
# re-tag to default cuda13 image tag and push, just in case arm64 build fails
- "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13-$(uname -m) public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13"
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13"

- label: "Build release image CUDA 13.0 (arm64)"
depends_on: ~
id: build-release-image-cuda13-arm64
agents:
queue: arm64_cpu_queue_postmerge
commands:
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0 12.1' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ."
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13-$(uname -m)"

- label: "Create multi-arch manifest for CUDA 13.0"
depends_on:
- build-release-image-cuda13-x86
- build-release-image-cuda13-arm64
id: create-multi-arch-manifest-cuda13
agents:
queue: cpu_queue_postmerge
commands:
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
- "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13-x86_64 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13-aarch64 --amend"
- "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13"

- label: "Annotate release workflow"
depends_on:
- create-multi-arch-manifest
- create-multi-arch-manifest-cuda13
id: annotate-release-workflow
agents:
queue: cpu_queue_postmerge
Expand Down Expand Up @@ -187,8 +224,36 @@ steps:
- "docker manifest create vllm/vllm-openai:nightly-$BUILDKITE_COMMIT vllm/vllm-openai:nightly-x86_64 vllm/vllm-openai:nightly-aarch64 --amend"
- "docker manifest push vllm/vllm-openai:nightly"
- "docker manifest push vllm/vllm-openai:nightly-$BUILDKITE_COMMIT"
# Clean up old nightly builds (keep only last 14)
- "bash .buildkite/scripts/cleanup-nightly-builds.sh"
# Clean up old CUDA 12.9 nightly builds (keep only last 14)
- "bash .buildkite/scripts/cleanup-nightly-builds.sh nightly-"
plugins:
- docker-login#v3.0.0:
username: vllmbot
password-env: DOCKERHUB_TOKEN
env:
DOCKER_BUILDKIT: "1"
DOCKERHUB_USERNAME: "vllmbot"

- label: "Build and publish nightly CUDA 13.0 multi-arch image to DockerHub"
depends_on:
- create-multi-arch-manifest-cuda13
if: build.env("NIGHTLY") == "1"
agents:
queue: cpu_queue_postmerge
commands:
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
- "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13-x86_64"
- "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13-aarch64"
- "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13-x86_64 vllm/vllm-openai:cuda13-nightly-x86_64"
- "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cuda13-aarch64 vllm/vllm-openai:cuda13-nightly-aarch64"
- "docker push vllm/vllm-openai:cuda13-nightly-x86_64"
- "docker push vllm/vllm-openai:cuda13-nightly-aarch64"
- "docker manifest create vllm/vllm-openai:cuda13-nightly vllm/vllm-openai:cuda13-nightly-x86_64 vllm/vllm-openai:cuda13-nightly-aarch64 --amend"
- "docker manifest create vllm/vllm-openai:cuda13-nightly-$BUILDKITE_COMMIT vllm/vllm-openai:cuda13-nightly-x86_64 vllm/vllm-openai:cuda13-nightly-aarch64 --amend"
- "docker manifest push vllm/vllm-openai:cuda13-nightly"
- "docker manifest push vllm/vllm-openai:cuda13-nightly-$BUILDKITE_COMMIT"
# Clean up old CUDA 13.0 nightly builds (keep only last 14)
- "bash .buildkite/scripts/cleanup-nightly-builds.sh cuda13-nightly-"
plugins:
- docker-login#v3.0.0:
username: vllmbot
Comment on lines +237 to 259
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This new step to publish nightly CUDA 13.0 images is almost a complete duplicate of the existing step for CUDA 12.9 images. This level of duplication makes the pipeline configuration difficult to maintain and prone to errors, as changes need to be manually synchronized across multiple blocks.

To improve maintainability, I strongly recommend refactoring this using YAML anchors and aliases. You can define a template for the common parts of the job and then reuse it for each CUDA version, only overriding the specific parts like dependencies and tag prefixes.

Here is a conceptual example:

.publish_nightly_template: &publish_nightly_template
  if: build.env("NIGHTLY") == "1"
  agents:
    queue: cpu_queue_postmerge
  plugins:
    - docker-login#v3.0.0:
        username: vllmbot
        password-env: DOCKERHUB_TOKEN
  # ... other common properties

- label: "Build and publish nightly CUDA 12.9 ..."
  <<: *publish_nightly_template
  depends_on:
    - create-multi-arch-manifest
  commands:
    # ... commands with version-specific tags
    - "bash .buildkite/scripts/cleanup-nightly-builds.sh nightly-"

- label: "Build and publish nightly CUDA 13.0 ..."
  <<: *publish_nightly_template
  depends_on:
    - create-multi-arch-manifest-cuda13
  commands:
    # ... commands with version-specific tags
    - "bash .buildkite/scripts/cleanup-nightly-builds.sh cuda13-nightly-"

Even the commands list could be further parameterized using variables to reduce duplication even more. Adopting this pattern will make the pipeline much cleaner and easier to manage.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a fair comment and I think it'd be a good idea to refactor them, if @csahithi wanna take on this challenge.

Expand Down
11 changes: 9 additions & 2 deletions .buildkite/scripts/cleanup-nightly-builds.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@
set -ex

# Clean up old nightly builds from DockerHub, keeping only the last 14 builds
# This script uses DockerHub API to list and delete old tags with "nightly-" prefix
# This script uses DockerHub API to list and delete old tags with specified prefix
# Usage: cleanup-nightly-builds.sh [TAG_PREFIX]
# Example: cleanup-nightly-builds.sh "nightly-" or cleanup-nightly-builds.sh "nightly-cuda13-"

# Get tag prefix from argument, default to "nightly-" if not provided
TAG_PREFIX="${1:-nightly-}"

echo "Cleaning up tags with prefix: $TAG_PREFIX"

# DockerHub API endpoint for vllm/vllm-openai repository
REPO_API_URL="https://hub.docker.com/v2/repositories/vllm/vllm-openai/tags"
Expand Down Expand Up @@ -45,7 +52,7 @@ get_all_tags() {
set -x

# Get both last_updated timestamp and tag name, separated by |
local tags=$(echo "$response" | jq -r '.results[] | select(.name | startswith("nightly-")) | "\(.last_updated)|\(.name)"')
local tags=$(echo "$response" | jq -r --arg prefix "$TAG_PREFIX" '.results[] | select(.name | startswith($prefix)) | "\(.last_updated)|\(.name)"')

if [ -z "$tags" ]; then
break
Expand Down