Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions buildkite/test-template-amd.j2
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
{% set cov_enabled = (cov_enabled == "1") %}
{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %}
{% set docker_image_torch_nightly = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-torch-nightly" %}
{% set docker_image_cu118 = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-cu118" %}
{% set docker_image_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-cpu" %}
{% if branch == "main" %}
{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT" %}
{% set docker_image_latest = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:latest" %}
{% set docker_image_torch_nightly = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-torch-nightly" %}
{% set docker_image_cu118 = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cu118" %}
{% set docker_image_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cpu" %}
{% endif %}
Expand Down
56 changes: 3 additions & 53 deletions buildkite/test-template-ci.j2
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
{% set cov_enabled = (cov_enabled == "1") %}
{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %}
{% set pull_through_docker_image = "936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %}
{% set docker_image_torch_nightly = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-torch-nightly" %}
{% set docker_image_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-cpu" %}
{% set docker_image_arm64_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT-arm64-cpu" %}
{% if branch == "main" %}
{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT" %}
{% set pull_through_docker_image = "936637512419.dkr.ecr.us-west-2.amazonaws.com/vllm-ci-pull-through-cache/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT" %}
{% set docker_image_latest = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:latest" %}
{% set docker_image_torch_nightly = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-torch-nightly" %}
{% set docker_image_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cpu" %}
{% set docker_image_arm64_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-arm64-cpu" %}
{% set docker_image_hpu = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-hpu" %}
Expand Down Expand Up @@ -450,54 +448,6 @@ steps:
- group: "vllm against torch nightly"
depends_on: ~
steps:
{% if nightly != "1" %}
- block: Build torch nightly image
key: block-build-torch-nightly
depends_on: ~
{% endif %}
- label: ":docker: build image torch nightly"
key: image-build-torch-nightly
{% if nightly != "1" %}
depends_on: block-build-torch-nightly
{% else %}
depends_on: ~
{% endif %}
soft_fail: true
agents:
{% if branch == "main" %}
queue: cpu_queue_postmerge_us_east_1
{% else %}
queue: cpu_queue_premerge_us_east_1
{% endif %}
timeout_in_minutes: 360
commands:
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
- |
#!/bin/bash
if [[ -z $(docker manifest inspect {{ docker_image_torch_nightly }}) ]]; then
echo "Image not found, proceeding with build..."
else
echo "Image found"
exit 0
fi
- >
docker build
--file docker/Dockerfile.nightly_torch
--build-arg max_jobs=16
--build-arg buildkite_commit=$BUILDKITE_COMMIT
--build-arg USE_SCCACHE=1
--tag {{ docker_image_torch_nightly }}
--target test
--progress plain .
- "docker push {{ docker_image_torch_nightly }}"
env:
DOCKER_BUILDKIT: "1"
retry:
automatic:
- exit_status: -1 # Agent was lost
limit: 2
- exit_status: -10 # Agent was lost
limit: 2
{% for step in steps %}
{% if step.torch_nightly %}
{% set ns = namespace(blocked=1) %}
Expand All @@ -519,18 +469,18 @@ steps:

{% if ns.blocked == 1 or (step.optional and nightly != "1") %}
- block: "Run Torch Nightly {{ step.label }}"
depends_on: image-build-torch-nightly
depends_on: image-build
key: block-torch-nightly-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
{% endif %}

- label: "Torch Nightly {{ step.label }}"
{% if ns.blocked == 1 or (step.optional and nightly != "1") %}
depends_on: block-torch-nightly-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
{% else %}
depends_on: image-build-torch-nightly
depends_on: image-build
{% endif %}
soft_fail: true
{{ render_cuda_config(step, docker_image_torch_nightly, default_working_dir, hf_home_fsx, hf_home, branch) | indent(8, true) }}
{{ render_cuda_config(step, docker_image_cpu if step.no_gpu else (pull_through_docker_image if step.gpu == "h100" else docker_image), default_working_dir, hf_home_fsx, hf_home, branch) | indent(4, true) }}
{% endif %}
{% endfor %}

Expand Down