Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
31 changes: 4 additions & 27 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,21 +1,6 @@
# Document 1: Buildkite loads only this block on first parse. The next step resolves docs-only skip-ci
# from git diff, then uploads document 2. When docs-only skip applies, image-build still runs if nightly-test
# / main NIGHTLY so upload-nightly is not skipped together with test-ready/test-merge.
#
# Document 2: appended after `---`; same file, read by upload_pipeline_with_skip_ci.sh (not evaluated as a second pipeline by Buildkite).
steps:
- label: ":github: Resolve skip-ci & upload pipeline"
key: upload-ci-pipeline
commands:
- "bash .buildkite/scripts/upload_pipeline_with_skip_ci.sh"
agents:
queue: "cpu_queue_premerge"

---
steps:
- label: ":docker: Build image"
key: image-build
if: __IMAGE_BUILD_IF__
commands:
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
- "docker build --progress=plain --file docker/Dockerfile.ci -t vllm-omni-ci ."
Expand All @@ -28,7 +13,7 @@ steps:
- label: "Upload Ready Pipeline"
depends_on: image-build
key: upload-ready-pipeline
if: __UPLOAD_READY_IF__
if: build.branch != "main" && build.pull_request.labels includes "ready"
commands:
- buildkite-agent pipeline upload .buildkite/test-ready.yml
agents:
Expand All @@ -38,25 +23,17 @@ steps:
- label: "Upload Merge Pipeline"
depends_on: image-build
key: upload-merge-pipeline
if: __UPLOAD_MERGE_IF__
if: build.branch == "main" && build.env("NIGHTLY") != "1"
commands:
- buildkite-agent pipeline upload .buildkite/test-merge.yml
agents:
queue: "cpu_queue_premerge"

# L4 Test — main+NIGHTLY=1 (scheduled), or PR with specific label (e.g. add label then Rebuild)
# L4 Test — main+NIGHTLY=1 (scheduled), or PR with label nightly-test (e.g. add label then Rebuild)
- label: "Upload Nightly Pipeline"
depends_on: image-build
key: upload-nightly-pipeline
if: >-
(build.branch == "main" && build.env("NIGHTLY") == "1") ||
(build.branch != "main" && (
build.pull_request.labels includes "nightly-test" ||
build.pull_request.labels includes "omni-test" ||
build.pull_request.labels includes "tts-test" ||
build.pull_request.labels includes "diffusion-x2iat-test" ||
build.pull_request.labels includes "diffusion-x2v-test"
))
if: '(build.branch == "main" && build.env("NIGHTLY") == "1") || (build.branch != "main" && build.pull_request.labels includes "nightly-test")'
commands:
- buildkite-agent pipeline upload .buildkite/test-nightly.yml
agents:
Expand Down
11 changes: 5 additions & 6 deletions .buildkite/scripts/generate-and-upload-nightly-index.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ has_new_python=$($PYTHON -c "print(1 if __import__('sys').version_info >= (3,12)
if [[ "$has_new_python" -eq 0 ]]; then
# use new python from docker
docker pull python:3-slim
PYTHON="docker run --rm --user $(id -u):$(id -g) -v $(pwd):/app -w /app python:3-slim python3"
PYTHON="docker run --rm -v $(pwd):/app -w /app python:3-slim python3"
fi

echo "Using python interpreter: $PYTHON"
Expand All @@ -36,7 +36,7 @@ mkdir -p "$INDICES_OUTPUT_DIR"

# HACK: we do not need regex module here, but it is required by pre-commit hook
# To avoid any external dependency, we simply replace it back to the stdlib re module
sed -i.bak 's/import regex as re/import re/g' .buildkite/scripts/generate-nightly-index.py && rm -f .buildkite/scripts/generate-nightly-index.py.bak
sed -i 's/import regex as re/import re/g' .buildkite/scripts/generate-nightly-index.py

# Generate indices -- the version is just the commit hash (not omni/{commit})
# because relative paths are computed between the index and wheel directories,
Expand Down Expand Up @@ -73,16 +73,15 @@ echo "Pure version (without variant): $pure_version"

# re-generate and copy to /omni/{version}/ only if it does not have "dev" in the version
if [[ "$version" != *"dev"* ]]; then
s3_version="v$pure_version"
echo "Re-generating indices for /omni/$s3_version/"
echo "Re-generating indices for /omni/$pure_version/"
rm -rf "${INDICES_OUTPUT_DIR:?}"
mkdir -p "$INDICES_OUTPUT_DIR"
# wheel-dir is overridden to be the commit directory, so that the indices point to the correct wheel path
$PYTHON .buildkite/scripts/generate-nightly-index.py \
--version "$s3_version" \
--version "$pure_version" \
--wheel-dir "$BUILDKITE_COMMIT" \
--current-objects "$obj_json" \
--output-dir "$INDICES_OUTPUT_DIR" \
--comment "version $pure_version"
aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/omni/$s3_version/"
aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/omni/$pure_version/"
fi
3 changes: 2 additions & 1 deletion .buildkite/scripts/generate-nightly-index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@

import argparse
import json
import re
import sys
from dataclasses import asdict, dataclass
from datetime import datetime
from pathlib import Path
from typing import Any
from urllib.parse import quote

import regex as re


def normalize_package_name(name: str) -> str:
"""Normalize package name per PEP 503."""
Expand Down
26 changes: 23 additions & 3 deletions .buildkite/scripts/hardware_ci/run-amd-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@ set -o pipefail
export PYTHONPATH=".."

# Print ROCm version
echo "--- Confirming Clean Initial State"
while true; do
sleep 3
if grep -q clean /opt/amdgpu/etc/gpu_state; then
echo "GPUs state is \"clean\""
break
fi
done

echo "--- ROCm info"
rocminfo

Expand Down Expand Up @@ -42,14 +51,25 @@ cleanup_docker() {
# Call the cleanup docker function
cleanup_docker

echo "--- Resetting GPUs"

echo "reset" > /opt/amdgpu/etc/gpu_state

while true; do
sleep 3
if grep -q clean /opt/amdgpu/etc/gpu_state; then
echo "GPUs state is \"clean\""
break
fi
done

echo "--- Pulling container"
## Temporary change to use AMD Docker Hub to store the vllm-omni image
## Temporary change to use AMD Docker Hub to store the vllm-ci image
# to bypass the rate limit issue with ECR Public Gallery.
# Images are now stored in a separate repository for vllm-omni, instead of vllm-ci.
# TODO: @tjtanaa point back to ECR Public Gallery
# once the amd agents are configured to use ECR Public Gallery.
# image_name="public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:${BUILDKITE_COMMIT}-rocm-omni"
image_name="rocm/vllm-omni:${BUILDKITE_COMMIT}"
image_name="rocm/vllm-ci:${BUILDKITE_COMMIT}-rocm-omni"
container_name="rocm_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"

# TODO: @tjtanaa uncomment this once the amd agents are configured to use ECR Public Gallery.
Expand Down
137 changes: 0 additions & 137 deletions .buildkite/scripts/upload_pipeline_with_skip_ci.sh

This file was deleted.

Loading