Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 5 additions & 8 deletions .buildkite/test-merge.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
HF_HUB_DOWNLOAD_TIMEOUT: 300
HF_HUB_ETAG_TIMEOUT: 60

steps:
- label: "Simple Unit Test"
depends_on: upload-merge-pipeline
Expand Down Expand Up @@ -169,7 +174,6 @@ steps:
commands:
- |
timeout 15m bash -c '
export VLLM_WORKER_MULTIPROC_METHOD=spawn
pytest -s -v tests/engine/test_async_omni_engine_abort.py
'
agents:
Expand All @@ -191,7 +195,6 @@ steps:
depends_on: upload-merge-pipeline
commands:
- export VLLM_LOGGING_LEVEL=DEBUG
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/offline_inference/test_qwen2_5_omni.py tests/e2e/online_serving/test_qwen2_5_omni.py -m "advanced_model" --run-level "advanced_model"
agents:
queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
Expand All @@ -212,7 +215,6 @@ steps:
- |
timeout 20m bash -c '
export VLLM_LOGGING_LEVEL=DEBUG
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1"
pytest -s -v tests/e2e/online_serving/test_qwen3_tts_customvoice.py tests/e2e/offline_inference/test_qwen3_tts_customvoice.py -m "advanced_model" --run-level "advanced_model"
'
Expand All @@ -235,7 +237,6 @@ steps:
- |
timeout 20m bash -c '
export VLLM_LOGGING_LEVEL=DEBUG
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1"
pytest -s -v tests/e2e/online_serving/test_qwen3_tts_base.py tests/e2e/offline_inference/test_qwen3_tts_base.py -m "advanced_model" --run-level "advanced_model"
'
Expand All @@ -256,7 +257,6 @@ steps:
timeout_in_minutes: 30
depends_on: upload-merge-pipeline
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export VLLM_TEST_CLEAN_GPU_MEMORY="1"
- pytest -s -v tests/e2e/offline_inference/test_qwen3_omni.py tests/e2e/online_serving/test_qwen3_omni.py tests/e2e/online_serving/test_mimo_audio.py -m "advanced_model" --run-level "advanced_model"
agents:
Expand Down Expand Up @@ -297,7 +297,6 @@ steps:
timeout_in_minutes: 20
depends_on: upload-merge-pipeline
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/online_serving/test_image_gen_edit.py
agents:
queue: "mithril-h100-pool"
Expand Down Expand Up @@ -340,7 +339,6 @@ steps:
- |
timeout 55m bash -c '
set -e
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_TEST_CLEAN_GPU_MEMORY=1
export VLLM_IMAGE_FETCH_TIMEOUT=60
pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py -m "advanced_model" --run-level "advanced_model" -k "shared_memory"
Expand Down Expand Up @@ -387,7 +385,6 @@ steps:
- |
timeout 20m bash -c '
export VLLM_LOGGING_LEVEL=DEBUG
export VLLM_WORKER_MULTIPROC_METHOD=spawn
pytest -s -v tests/e2e/online_serving/test_voxtral_tts.py tests/e2e/offline_inference/test_voxtral_tts.py -m "advanced_model" --run-level "advanced_model"
'
agents:
Expand Down
14 changes: 5 additions & 9 deletions .buildkite/test-nightly-diffusion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
# buildkite-agent pipeline upload .buildkite/test-nightly-diffusion.yml
# from test-nightly.yml (step key: nightly-diffusion-model-test). Top-level groups are
# foldable in the Buildkite UI (Other / Wan / Qwen-Image).
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
HF_HUB_DOWNLOAD_TIMEOUT: 300
HF_HUB_ETAG_TIMEOUT: 60

steps:
- group: ":card_index_dividers: Other Model Test"
key: nightly-other-model-test-group
Expand All @@ -10,7 +15,6 @@ steps:
timeout_in_minutes: 120
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/online_serving/test_*_expansion.py -k "not test_wan22_expansion and not test_wan_2_1_vace_expansion and not test_qwen_image" -m "advanced_model and diffusion and H100" --run-level "advanced_model"
agents:
queue: "mithril-h100-pool"
Expand Down Expand Up @@ -50,7 +54,6 @@ steps:
timeout_in_minutes: 60
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and diffusion and L4" --run-level "advanced_model"
agents:
queue: "gpu_4_queue" # g6.12xlarge instance on AWS, has 4 L4 GPU
Expand All @@ -70,7 +73,6 @@ steps:
timeout_in_minutes: 60
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export VLLM_TEST_CLEAN_GPU_MEMORY="1"
- pytest -s -v tests/examples/online_serving/test_text_to_image.py tests/examples/offline_inference/test_text_to_image.py -m "advanced_model and example and H100" --run-level "advanced_model"
agents:
Expand Down Expand Up @@ -114,7 +116,6 @@ steps:
timeout_in_minutes: 90
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/online_serving/test_wan22_expansion.py tests/e2e/online_serving/test_wan_2_1_vace_expansion.py -m "advanced_model" --run-level "advanced_model"
agents:
queue: "mithril-h100-pool"
Expand Down Expand Up @@ -155,7 +156,6 @@ steps:
timeout_in_minutes: 180
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/accuracy/wan22_i2v/test_wan22_i2v_video_similarity.py --run-level advanced_model
agents:
queue: "mithril-h100-pool"
Expand Down Expand Up @@ -198,7 +198,6 @@ steps:
timeout_in_minutes: 120
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/online_serving/test_qwen_image*_expansion.py -m "advanced_model and diffusion and H100" --run-level "advanced_model"
agents:
queue: "mithril-h100-pool"
Expand Down Expand Up @@ -239,7 +238,6 @@ steps:
timeout_in_minutes: 60
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/accuracy/test_gebench_h100_smoke.py --run-level advanced_model --gebench-model Qwen/Qwen-Image-2512 --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gebench-port 8093 --accuracy-workers 1
- buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gebench_qwen-image-2512/summary*.json"
agents:
Expand Down Expand Up @@ -281,7 +279,6 @@ steps:
timeout_in_minutes: 60
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/accuracy/test_gedit_bench_h100_smoke.py --run-level advanced_model --gedit-model Qwen/Qwen-Image-Edit --accuracy-judge-model QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ --accuracy-gpu 0 --gedit-port 8093 --gedit-samples-per-group 20 --accuracy-workers 1
- buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_vie_score_*.csv"
- buildkite-agent artifact upload "tests/e2e/accuracy/artifacts/gedit_scores_qwen-image-edit/qwen-image-edit_all_all_summary_*.json"
Expand Down Expand Up @@ -326,7 +323,6 @@ steps:
timeout_in_minutes: 180
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export DIFFUSION_BENCHMARK_DIR=tests/dfx/perf/results
- export CACHE_DIT_VERSION=1.3.0
- pytest -s -v tests/dfx/perf/scripts/run_diffusion_benchmark.py --config-file tests/dfx/perf/tests/test_qwen_image_vllm_omni.json
Expand Down
11 changes: 5 additions & 6 deletions .buildkite/test-nightly.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
HF_HUB_DOWNLOAD_TIMEOUT: 300
HF_HUB_ETAG_TIMEOUT: 60

steps:
# Group: collapses under one heading in the Buildkite UI; child steps still run in parallel.
- group: ":card_index_dividers: Omni Model Test"
Expand All @@ -8,7 +13,6 @@ steps:
depends_on: upload-nightly-pipeline
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and H100 and omni" --run-level "advanced_model"
agents:
queue: "mithril-h100-pool"
Expand Down Expand Up @@ -49,7 +53,6 @@ steps:
depends_on: upload-nightly-pipeline
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1"
- pytest -s -v tests/e2e/online_serving/test_*_expansion.py -m "advanced_model and L4 and omni" --run-level "advanced_model"
agents:
Expand All @@ -71,7 +74,6 @@ steps:
depends_on: upload-nightly-pipeline
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1"
- pytest -s -v tests/examples/ -m "advanced_model and omni and L4" --run-level "advanced_model"
agents:
Expand All @@ -93,7 +95,6 @@ steps:
depends_on: upload-nightly-pipeline
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -s -v tests/examples/ -m "advanced_model and omni and H100" --run-level "advanced_model"
agents:
queue: "mithril-h100-pool"
Expand Down Expand Up @@ -135,7 +136,6 @@ steps:
depends_on: upload-nightly-pipeline
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- export BENCHMARK_DIR=tests/dfx/perf/results
- export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1"
- pytest -s -v tests/dfx/perf/scripts/run_benchmark.py
Expand Down Expand Up @@ -193,7 +193,6 @@ steps:
depends_on: upload-nightly-pipeline
if: build.env("NIGHTLY") == "1" || build.pull_request.labels includes "nightly-test"
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- python tools/nightly/buildkite_testcase_statistics.py -o tests/dfx/perf/results/buildkite_testcase_statistics.html
- buildkite-agent artifact upload "tests/dfx/perf/results/*.html"
agents:
Expand Down
17 changes: 5 additions & 12 deletions .buildkite/test-ready.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
HF_HUB_DOWNLOAD_TIMEOUT: 300
HF_HUB_ETAG_TIMEOUT: 60

steps:
- label: "Simple Unit Test"
depends_on: upload-ready-pipeline
Expand Down Expand Up @@ -173,7 +178,6 @@ steps:
commands:
- |
timeout 15m bash -c '
export VLLM_WORKER_MULTIPROC_METHOD=spawn
pytest -s -v tests/engine/test_async_omni_engine_abort.py
'
agents:
Expand All @@ -197,7 +201,6 @@ steps:
- |
timeout 17m bash -c '
export VLLM_LOGGING_LEVEL=DEBUG
export VLLM_WORKER_MULTIPROC_METHOD=spawn
pytest -s -v tests/e2e/online_serving/test_qwen2_5_omni.py -m "core_model" --run-level "core_model"
'
agents:
Expand All @@ -218,7 +221,6 @@ steps:
commands:
- |
timeout 20m bash -c '
export VLLM_WORKER_MULTIPROC_METHOD=spawn
pytest -s -v tests/e2e/online_serving/test_qwen3_omni.py -m "core_model" --run-level "core_model"
'
agents:
Expand Down Expand Up @@ -256,7 +258,6 @@ steps:
- |
timeout 30m bash -c '
export VLLM_LOGGING_LEVEL=DEBUG
export VLLM_WORKER_MULTIPROC_METHOD=spawn
pytest -s -v tests/e2e/online_serving/test_mimo_audio.py -m "core_model" --run-level "core_model"
'
agents:
Expand Down Expand Up @@ -299,7 +300,6 @@ steps:
- |
timeout 20m bash -c '
export VLLM_LOGGING_LEVEL=DEBUG
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_ALLOW_LONG_MAX_MODEL_LEN="1"
pytest -s -v tests/e2e/online_serving/test_qwen3_tts_customvoice.py -m "core_model" --run-level "core_model"
'
Expand All @@ -324,7 +324,6 @@ steps:
- |
timeout 20m bash -c '
export VLLM_LOGGING_LEVEL=DEBUG
export VLLM_WORKER_MULTIPROC_METHOD=spawn
pytest -s -v tests/e2e/online_serving/test_omnivoice.py -m "core_model" --run-level "core_model"
'
agents:
Expand All @@ -347,7 +346,6 @@ steps:
- |
timeout 20m bash -c '
export VLLM_LOGGING_LEVEL=DEBUG
export VLLM_WORKER_MULTIPROC_METHOD=spawn
pytest -s -v tests/e2e/online_serving/test_voxtral_tts.py -m "core_model" --run-level "core_model"
'
agents:
Expand Down Expand Up @@ -384,7 +382,6 @@ steps:
# commands:
# - |
# timeout 20m bash -c '
# export VLLM_WORKER_MULTIPROC_METHOD=spawn
# pytest -s -v tests/e2e/online_serving/test_image_gen_edit.py
# '
# agents:
Expand Down Expand Up @@ -421,7 +418,6 @@ steps:
commands:
- |
timeout 30m bash -c '
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_TEST_CLEAN_GPU_MEMORY=1
pytest -s -v tests/e2e/offline_inference/test_bagel_text2img.py -m "core_model" --run-level "core_model"
'
Expand Down Expand Up @@ -464,7 +460,6 @@ steps:
commands:
- |
timeout 30m bash -c '
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_TEST_CLEAN_GPU_MEMORY=1
pytest -s -v tests/e2e/offline_inference/test_bagel_img2img.py -m "core_model" --run-level "core_model"
'
Expand Down Expand Up @@ -507,7 +502,6 @@ steps:
commands:
- |
timeout 40m bash -c '
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_TEST_CLEAN_GPU_MEMORY=1
export VLLM_IMAGE_FETCH_TIMEOUT=60
pytest -s -v tests/e2e/online_serving/test_bagel_online.py -m "core_model" --run-level "core_model"
Expand Down Expand Up @@ -552,7 +546,6 @@ steps:
commands:
- |
timeout 20m bash -c '
export VLLM_WORKER_MULTIPROC_METHOD=spawn
pytest -s -v tests/e2e/online_serving/test_cosyvoice3_tts.py -m "core_model" --run-level "core_model"
'
agents:
Expand Down
Loading