From 7aded98e8e0ef6f9a1f3681c034d79189832f4a5 Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Tue, 3 Mar 2026 13:02:28 -0600 Subject: [PATCH 1/5] Attempting to fix quotation Signed-off-by: Andreas Karatzas --- .buildkite/scripts/hardware_ci/run-amd-test.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index 8895771f0a40..70eef322bd19 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -205,6 +205,13 @@ re_quote_pytest_markers() { esac if $is_boundary; then + # Strip surrounding double quotes if present (from upstream + # single-to-double conversion); without this, wrapping below + # would produce '"expr"' with literal double-quote characters. + if [[ "$marker_buf" == '"'*'"' ]]; then + marker_buf="${marker_buf#\"}" + marker_buf="${marker_buf%\"}" + fi # Flush the collected marker expression if [[ "$marker_buf" == *" "* || "$marker_buf" == *"("* ]]; then output+="'${marker_buf}' " @@ -242,6 +249,11 @@ re_quote_pytest_markers() { # Flush any trailing marker expression (marker at end of command) if $collecting && [[ -n "$marker_buf" ]]; then + # Strip surrounding double quotes (see mid-stream flush comment) + if [[ "$marker_buf" == '"'*'"' ]]; then + marker_buf="${marker_buf#\"}" + marker_buf="${marker_buf%\"}" + fi if [[ "$marker_buf" == *" "* || "$marker_buf" == *"("* ]]; then output+="'${marker_buf}'" else From 09d0124c76d2a159457f1d9b07df4e8cf6b59a8b Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Tue, 3 Mar 2026 18:51:05 -0600 Subject: [PATCH 2/5] Adding more mirrors and testing new orchestrator Signed-off-by: Andreas Karatzas --- .../scripts/hardware_ci/run-amd-test.sh | 2 ++ .buildkite/test_areas/expert_parallelism.yaml | 10 ++++++++++ .buildkite/test_areas/misc.yaml | 10 ++++++++++ .buildkite/test_areas/models_language.yaml | 10 ++++++++++ .buildkite/test_areas/models_multimodal.yaml | 20 +++++++++++++++++++ 5 files changed, 52 insertions(+) diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index 70eef322bd19..1c43c404d247 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -504,6 +504,8 @@ else -e HF_TOKEN \ -e AWS_ACCESS_KEY_ID \ -e AWS_SECRET_ACCESS_KEY \ + -e BUILDKITE_PARALLEL_JOB \ + -e BUILDKITE_PARALLEL_JOB_COUNT \ -v "${HF_CACHE}:${HF_MOUNT}" \ -e "HF_HOME=${HF_MOUNT}" \ -e "PYTHONPATH=${MYPYTHONPATH}" \ diff --git a/.buildkite/test_areas/expert_parallelism.yaml b/.buildkite/test_areas/expert_parallelism.yaml index 1443d847eaf5..5c9f2f5a23e6 100644 --- a/.buildkite/test_areas/expert_parallelism.yaml +++ b/.buildkite/test_areas/expert_parallelism.yaml @@ -10,6 +10,11 @@ steps: - tests/distributed/test_eplb_algo.py commands: - pytest -v -s distributed/test_eplb_algo.py + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: EPLB Execution timeout_in_minutes: 20 @@ -21,6 +26,11 @@ steps: commands: - pytest -v -s distributed/test_eplb_execute.py - pytest -v -s distributed/test_eplb_spec_decode.py + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Elastic EP Scaling Test timeout_in_minutes: 20 diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml index d8957c217755..21b9522ae6b5 100644 --- a/.buildkite/test_areas/misc.yaml +++ b/.buildkite/test_areas/misc.yaml @@ -57,6 +57,11 @@ steps: - pip install modelscope - pytest -v -s test_regression.py working_dir: "/vllm-workspace/tests" # optional + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Examples timeout_in_minutes: 45 @@ -122,6 +127,11 @@ steps: - pytest -v -s detokenizer - pytest -v -s -m 'not cpu_test' multimodal - pytest -v -s utils_ + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Async Engine, Inputs, Utils, Worker, Config (CPU) depends_on: diff --git a/.buildkite/test_areas/models_language.yaml b/.buildkite/test_areas/models_language.yaml index a3bd21ccff3c..bf1742f037c0 100644 --- a/.buildkite/test_areas/models_language.yaml +++ b/.buildkite/test_areas/models_language.yaml @@ -12,6 +12,11 @@ steps: # Test standard language models, excluding a subset of slow tests - pip freeze | grep -E 'torch' - pytest -v -s models/language -m 'core_model and (not slow_test)' + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Language Models Tests (Extra Standard) %N timeout_in_minutes: 45 @@ -42,6 +47,11 @@ steps: # Shard hybrid language model tests - pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB parallelism: 2 + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Language Models Test (Extended Generation) # 80min timeout_in_minutes: 110 diff --git a/.buildkite/test_areas/models_multimodal.yaml b/.buildkite/test_areas/models_multimodal.yaml index a1194c229866..cd58d47a1649 100644 --- a/.buildkite/test_areas/models_multimodal.yaml +++ b/.buildkite/test_areas/models_multimodal.yaml @@ -45,6 +45,11 @@ steps: - vllm/v1/core/ commands: - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-mm-small.txt --tp-size=1 + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Multi-Modal Models (Extended) 1 optional: true @@ -54,6 +59,11 @@ steps: commands: - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - pytest -v -s models/multimodal -m 'not core_model' --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/processing + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Multi-Modal Models (Extended) 2 optional: true @@ -63,6 +73,11 @@ steps: commands: - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model' + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Multi-Modal Models (Extended) 3 optional: true @@ -72,3 +87,8 @@ steps: commands: - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model' + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd From 1e8105b4f859fa19b59f559639da7d2390ac4153 Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Tue, 3 Mar 2026 23:23:53 -0600 Subject: [PATCH 3/5] Removed lang mod std mirror until fixed Signed-off-by: Andreas Karatzas --- .buildkite/test_areas/misc.yaml | 5 ----- .buildkite/test_areas/models_language.yaml | 11 ++++++----- .buildkite/test_areas/models_multimodal.yaml | 5 +++++ 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml index 21b9522ae6b5..c1596adbef23 100644 --- a/.buildkite/test_areas/misc.yaml +++ b/.buildkite/test_areas/misc.yaml @@ -127,11 +127,6 @@ steps: - pytest -v -s detokenizer - pytest -v -s -m 'not cpu_test' multimodal - pytest -v -s utils_ - mirror: - amd: - device: mi325_1 - depends_on: - - image-build-amd - label: Async Engine, Inputs, Utils, Worker, Config (CPU) depends_on: diff --git a/.buildkite/test_areas/models_language.yaml b/.buildkite/test_areas/models_language.yaml index bf1742f037c0..d0c235800c35 100644 --- a/.buildkite/test_areas/models_language.yaml +++ b/.buildkite/test_areas/models_language.yaml @@ -12,11 +12,6 @@ steps: # Test standard language models, excluding a subset of slow tests - pip freeze | grep -E 'torch' - pytest -v -s models/language -m 'core_model and (not slow_test)' - mirror: - amd: - device: mi325_1 - depends_on: - - image-build-amd - label: Language Models Tests (Extra Standard) %N timeout_in_minutes: 45 @@ -52,6 +47,12 @@ steps: device: mi325_1 depends_on: - image-build-amd + commands: + - pytest -v -s v1/e2e + - pytest -v -s v1/engine + - uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@fix-rocm-7.0-warp-size-constexpr' + - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2' + - pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB - label: Language Models Test (Extended Generation) # 80min timeout_in_minutes: 110 diff --git a/.buildkite/test_areas/models_multimodal.yaml b/.buildkite/test_areas/models_multimodal.yaml index cd58d47a1649..538aa69a3270 100644 --- a/.buildkite/test_areas/models_multimodal.yaml +++ b/.buildkite/test_areas/models_multimodal.yaml @@ -35,6 +35,11 @@ steps: commands: - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - pytest -v -s models/multimodal/processing/test_tensor_schema.py + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Multi-Modal Accuracy Eval (Small Models) # 50min timeout_in_minutes: 70 From d5ea9f6c199b170f7d2097a29c05e085729c5342 Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Thu, 5 Mar 2026 16:53:37 -0600 Subject: [PATCH 4/5] [ROCm][CI] Gating and mirroring more TGs - stage D Signed-off-by: Andreas Karatzas --- .../scripts/hardware_ci/run-amd-test.sh | 14 ---- .buildkite/test_areas/distributed.yaml | 67 ++++++++++++++++++- .buildkite/test_areas/expert_parallelism.yaml | 10 --- .buildkite/test_areas/lora.yaml | 6 +- .buildkite/test_areas/misc.yaml | 5 -- .buildkite/test_areas/models_language.yaml | 18 ++++- .buildkite/test_areas/models_multimodal.yaml | 10 --- .buildkite/test_areas/quantization.yaml | 5 ++ 8 files changed, 92 insertions(+), 43 deletions(-) diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index 1c43c404d247..8895771f0a40 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -205,13 +205,6 @@ re_quote_pytest_markers() { esac if $is_boundary; then - # Strip surrounding double quotes if present (from upstream - # single-to-double conversion); without this, wrapping below - # would produce '"expr"' with literal double-quote characters. - if [[ "$marker_buf" == '"'*'"' ]]; then - marker_buf="${marker_buf#\"}" - marker_buf="${marker_buf%\"}" - fi # Flush the collected marker expression if [[ "$marker_buf" == *" "* || "$marker_buf" == *"("* ]]; then output+="'${marker_buf}' " @@ -249,11 +242,6 @@ re_quote_pytest_markers() { # Flush any trailing marker expression (marker at end of command) if $collecting && [[ -n "$marker_buf" ]]; then - # Strip surrounding double quotes (see mid-stream flush comment) - if [[ "$marker_buf" == '"'*'"' ]]; then - marker_buf="${marker_buf#\"}" - marker_buf="${marker_buf%\"}" - fi if [[ "$marker_buf" == *" "* || "$marker_buf" == *"("* ]]; then output+="'${marker_buf}'" else @@ -504,8 +492,6 @@ else -e HF_TOKEN \ -e AWS_ACCESS_KEY_ID \ -e AWS_SECRET_ACCESS_KEY \ - -e BUILDKITE_PARALLEL_JOB \ - -e BUILDKITE_PARALLEL_JOB_COUNT \ -v "${HF_CACHE}:${HF_MOUNT}" \ -e "HF_HOME=${HF_MOUNT}" \ -e "PYTHONPATH=${MYPYTHONPATH}" \ diff --git a/.buildkite/test_areas/distributed.yaml b/.buildkite/test_areas/distributed.yaml index 64911983f5a8..8a62f15f9f8b 100644 --- a/.buildkite/test_areas/distributed.yaml +++ b/.buildkite/test_areas/distributed.yaml @@ -49,6 +49,27 @@ steps: - VLLM_TEST_SAME_HOST=1 VLLM_TEST_WITH_DEFAULT_DEVICE_SET=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed' - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown - pytest -v -s v1/worker/test_worker_memory_snapshot.py + mirror: + amd: + device: mi325_2 + depends_on: + - image-build-amd + commands: + # Work around HIP bug tracked here: https://github.com/ROCm/hip/issues/3876 + # TODO: Remove when the bug is fixed in a future ROCm release + - export TORCH_NCCL_BLOCKING_WAIT=1 + # NOTE: The rest is in complete parity with CUDA tests + - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py + - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py + - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py + - DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py + - pytest -v -s entrypoints/llm/test_collective_rpc.py + - pytest -v -s ./compile/fullgraph/test_basic_correctness.py + - pytest -v -s ./compile/test_wrapper.py + - VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed' + - VLLM_TEST_SAME_HOST=1 VLLM_TEST_WITH_DEFAULT_DEVICE_SET=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed' + - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown + - pytest -v -s v1/worker/test_worker_memory_snapshot.py - label: Distributed Tests (4 GPUs) timeout_in_minutes: 50 @@ -105,6 +126,40 @@ steps: - cd new_weight_syncing - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 rlhf_nccl.py - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 rlhf_ipc.py + mirror: + amd: + device: mi325_4 + depends_on: + - image-build-amd + commands: + # Work around HIP bug tracked here: https://github.com/ROCm/hip/issues/3876 + # TODO: Remove when the bug is fixed in a future ROCm release + - export TORCH_NCCL_BLOCKING_WAIT=1 + # NOTE: The rest is in complete parity with CUDA tests + - torchrun --nproc-per-node=4 distributed/test_torchrun_example.py + - PP_SIZE=2 torchrun --nproc-per-node=4 distributed/test_torchrun_example.py + - TP_SIZE=4 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py + - PP_SIZE=2 TP_SIZE=2 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py + - DP_SIZE=4 ENABLE_EP=1 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py + - TP_SIZE=2 DP_SIZE=2 ENABLE_EP=1 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py + - python3 ../examples/offline_inference/data_parallel.py --enforce-eager + - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py + - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py + - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py + - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/distributed/test_internal_lb_dp.py + - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/distributed/test_hybrid_lb_dp.py + - pytest -v -s v1/engine/test_engine_core_client.py::test_kv_cache_events_dp + - pytest -v -s distributed/test_utils.py + - pytest -v -s compile/fullgraph/test_basic_correctness.py + - pytest -v -s distributed/test_pynccl.py + - pytest -v -s distributed/test_events.py + - pytest -v -s distributed/test_symm_mem_allreduce.py + - cd ../examples/offline_inference + - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 rlhf.py + - VLLM_ALLOW_INSECURE_SERIALIZATION=1 RAY_DEDUP_LOGS=0 python3 rlhf_colocate.py + - cd new_weight_syncing + - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 rlhf_nccl.py + - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 rlhf_ipc.py - label: Distributed Tests (8 GPUs)(H100) timeout_in_minutes: 10 @@ -138,7 +193,7 @@ steps: - TARGET_TEST_SUITE=A100 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)' - pytest -v -s -x lora/test_mixtral.py -- label: Distributed Tests (2 GPUs)(H100) +- label: Distributed Tests (2 GPUs)(H100-MI325) timeout_in_minutes: 15 device: h100 optional: true @@ -149,6 +204,16 @@ steps: # - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 examples/offline_inference/new_weight_syncing/rlhf_async_new_apis.py --- failing, need to re-enable - VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model=Qwen/Qwen1.5-MoE-A2.7B -tp=1 -dp=2 --max-model-len=2048 --all2all-backend=deepep_high_throughput - pytest -v -s tests/v1/distributed/test_dbo.py + mirror: + amd: + device: mi325_2 + depends_on: + - image-build-amd + commands: + - pytest -v -s tests/distributed/test_context_parallel.py + - python3 examples/offline_inference/new_weight_syncing/rlhf_async_new_apis.py + - VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model=Qwen/Qwen1.5-MoE-A2.7B -tp=1 -dp=2 --max-model-len=2048 --all2all-backend=allgather_reducescatter --disable-nccl-for-dp-synchronization + - pytest -v -s tests/v1/distributed/test_dbo.py - label: Distributed Tests (2 GPUs)(B200) device: b200 diff --git a/.buildkite/test_areas/expert_parallelism.yaml b/.buildkite/test_areas/expert_parallelism.yaml index 5c9f2f5a23e6..1443d847eaf5 100644 --- a/.buildkite/test_areas/expert_parallelism.yaml +++ b/.buildkite/test_areas/expert_parallelism.yaml @@ -10,11 +10,6 @@ steps: - tests/distributed/test_eplb_algo.py commands: - pytest -v -s distributed/test_eplb_algo.py - mirror: - amd: - device: mi325_1 - depends_on: - - image-build-amd - label: EPLB Execution timeout_in_minutes: 20 @@ -26,11 +21,6 @@ steps: commands: - pytest -v -s distributed/test_eplb_execute.py - pytest -v -s distributed/test_eplb_spec_decode.py - mirror: - amd: - device: mi325_1 - depends_on: - - image-build-amd - label: Elastic EP Scaling Test timeout_in_minutes: 20 diff --git a/.buildkite/test_areas/lora.yaml b/.buildkite/test_areas/lora.yaml index f034175cc1b8..713e13e87263 100644 --- a/.buildkite/test_areas/lora.yaml +++ b/.buildkite/test_areas/lora.yaml @@ -10,7 +10,11 @@ steps: commands: - pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_llm_with_multi_loras.py --ignore=lora/test_olmoe_tp.py --ignore=lora/test_deepseekv2_tp.py --ignore=lora/test_gptoss_tp.py --ignore=lora/test_qwen3moe_tp.py parallelism: 4 - + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: LoRA TP (Distributed) timeout_in_minutes: 30 diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml index e7047d449516..dd14a1eac5a4 100644 --- a/.buildkite/test_areas/misc.yaml +++ b/.buildkite/test_areas/misc.yaml @@ -57,11 +57,6 @@ steps: - pip install modelscope - pytest -v -s test_regression.py working_dir: "/vllm-workspace/tests" # optional - mirror: - amd: - device: mi325_1 - depends_on: - - image-build-amd - label: Examples timeout_in_minutes: 45 diff --git a/.buildkite/test_areas/models_language.yaml b/.buildkite/test_areas/models_language.yaml index d0c235800c35..9ba4b52871b5 100644 --- a/.buildkite/test_areas/models_language.yaml +++ b/.buildkite/test_areas/models_language.yaml @@ -12,6 +12,11 @@ steps: # Test standard language models, excluding a subset of slow tests - pip freeze | grep -E 'torch' - pytest -v -s models/language -m 'core_model and (not slow_test)' + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Language Models Tests (Extra Standard) %N timeout_in_minutes: 45 @@ -27,6 +32,16 @@ steps: - pip freeze | grep -E 'torch' - pytest -v -s models/language -m 'core_model and slow_test' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB parallelism: 2 + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd + commands: + - export TORCH_NCCL_BLOCKING_WAIT=1 + # NOTE: The rest is in complete parity with CUDA tests + - pip freeze | grep -E 'torch' + - pytest -v -s models/language -m 'core_model and slow_test' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB - label: Language Models Tests (Hybrid) %N timeout_in_minutes: 75 @@ -48,9 +63,8 @@ steps: depends_on: - image-build-amd commands: - - pytest -v -s v1/e2e - - pytest -v -s v1/engine - uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@fix-rocm-7.0-warp-size-constexpr' + # NOTE: The rest is in complete parity with CUDA tests - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2' - pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB diff --git a/.buildkite/test_areas/models_multimodal.yaml b/.buildkite/test_areas/models_multimodal.yaml index 4a91234890d8..a18b6b39112d 100644 --- a/.buildkite/test_areas/models_multimodal.yaml +++ b/.buildkite/test_areas/models_multimodal.yaml @@ -55,11 +55,6 @@ steps: - vllm/v1/core/ commands: - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-mm-small.txt --tp-size=1 - mirror: - amd: - device: mi325_1 - depends_on: - - image-build-amd - label: Multi-Modal Models (Extended) 1 optional: true @@ -97,8 +92,3 @@ steps: commands: - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model' - mirror: - amd: - device: mi325_1 - depends_on: - - image-build-amd diff --git a/.buildkite/test_areas/quantization.yaml b/.buildkite/test_areas/quantization.yaml index 5ee2e5186966..9e25df9c3c57 100644 --- a/.buildkite/test_areas/quantization.yaml +++ b/.buildkite/test_areas/quantization.yaml @@ -44,3 +44,8 @@ steps: - tests/models/quantization commands: - pytest -v -s models/quantization + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd From 9f46b90cfc5f531752fff2bdef1b97376482897e Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Sat, 14 Mar 2026 21:29:06 -0500 Subject: [PATCH 5/5] Redistributing tests to different architectures Signed-off-by: Andreas Karatzas --- .buildkite/test_areas/basic_correctness.yaml | 2 +- .buildkite/test_areas/distributed.yaml | 6 +++--- .buildkite/test_areas/engine.yaml | 2 +- .buildkite/test_areas/entrypoints.yaml | 4 ++-- .buildkite/test_areas/lora.yaml | 2 +- .buildkite/test_areas/misc.yaml | 2 +- .buildkite/test_areas/models_basic.yaml | 2 +- .buildkite/test_areas/models_language.yaml | 14 ++------------ .buildkite/test_areas/models_multimodal.yaml | 2 +- .buildkite/test_areas/plugins.yaml | 2 +- .buildkite/test_areas/quantization.yaml | 2 +- .buildkite/test_areas/samplers.yaml | 2 +- 12 files changed, 16 insertions(+), 26 deletions(-) diff --git a/.buildkite/test_areas/basic_correctness.yaml b/.buildkite/test_areas/basic_correctness.yaml index 5259a66a3c9e..4498bebaf47e 100644 --- a/.buildkite/test_areas/basic_correctness.yaml +++ b/.buildkite/test_areas/basic_correctness.yaml @@ -16,6 +16,6 @@ steps: - pytest -v -s basic_correctness/test_cpu_offload.py mirror: amd: - device: mi325_1 + device: mi250_1 depends_on: - image-build-amd diff --git a/.buildkite/test_areas/distributed.yaml b/.buildkite/test_areas/distributed.yaml index 8a62f15f9f8b..daa3d42fbc62 100644 --- a/.buildkite/test_areas/distributed.yaml +++ b/.buildkite/test_areas/distributed.yaml @@ -51,7 +51,7 @@ steps: - pytest -v -s v1/worker/test_worker_memory_snapshot.py mirror: amd: - device: mi325_2 + device: mi250_2 depends_on: - image-build-amd commands: @@ -128,7 +128,7 @@ steps: - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 rlhf_ipc.py mirror: amd: - device: mi325_4 + device: mi250_4 depends_on: - image-build-amd commands: @@ -206,7 +206,7 @@ steps: - pytest -v -s tests/v1/distributed/test_dbo.py mirror: amd: - device: mi325_2 + device: mi250_2 depends_on: - image-build-amd commands: diff --git a/.buildkite/test_areas/engine.yaml b/.buildkite/test_areas/engine.yaml index b5b3eeb6d728..afd4822a84d1 100644 --- a/.buildkite/test_areas/engine.yaml +++ b/.buildkite/test_areas/engine.yaml @@ -49,7 +49,7 @@ steps: - pytest -v -s v1/e2e/test_spec_decode.py -k "tensor_parallelism" mirror: amd: - device: mi325_2 + device: mi250_2 depends_on: - image-build-amd diff --git a/.buildkite/test_areas/entrypoints.yaml b/.buildkite/test_areas/entrypoints.yaml index 5796036f3361..bdb7c2adc1f4 100644 --- a/.buildkite/test_areas/entrypoints.yaml +++ b/.buildkite/test_areas/entrypoints.yaml @@ -62,7 +62,7 @@ steps: - pytest -v -s tool_use mirror: amd: - device: mi325_1 + device: mi250_1 depends_on: - image-build-amd @@ -99,7 +99,7 @@ steps: - pytest -v -s v1/entrypoints mirror: amd: - device: mi325_1 + device: mi250_1 depends_on: - image-build-amd diff --git a/.buildkite/test_areas/lora.yaml b/.buildkite/test_areas/lora.yaml index 713e13e87263..85af12faacaf 100644 --- a/.buildkite/test_areas/lora.yaml +++ b/.buildkite/test_areas/lora.yaml @@ -12,7 +12,7 @@ steps: parallelism: 4 mirror: amd: - device: mi325_1 + device: mi250_1 depends_on: - image-build-amd diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml index dd14a1eac5a4..79b390715602 100644 --- a/.buildkite/test_areas/misc.yaml +++ b/.buildkite/test_areas/misc.yaml @@ -89,7 +89,7 @@ steps: - python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536 mirror: amd: - device: mi325_1 + device: mi250_1 depends_on: - image-build-amd diff --git a/.buildkite/test_areas/models_basic.yaml b/.buildkite/test_areas/models_basic.yaml index de0f3994dd10..f71fee7afbbb 100644 --- a/.buildkite/test_areas/models_basic.yaml +++ b/.buildkite/test_areas/models_basic.yaml @@ -38,7 +38,7 @@ steps: - pytest -v -s models/test_terratorch.py models/test_transformers.py models/test_registry.py mirror: amd: - device: mi325_1 + device: mi250_1 depends_on: - image-build-amd diff --git a/.buildkite/test_areas/models_language.yaml b/.buildkite/test_areas/models_language.yaml index 9ba4b52871b5..488b5f7cf986 100644 --- a/.buildkite/test_areas/models_language.yaml +++ b/.buildkite/test_areas/models_language.yaml @@ -34,7 +34,7 @@ steps: parallelism: 2 mirror: amd: - device: mi325_1 + device: mi250_1 depends_on: - image-build-amd commands: @@ -57,16 +57,6 @@ steps: # Shard hybrid language model tests - pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB parallelism: 2 - mirror: - amd: - device: mi325_1 - depends_on: - - image-build-amd - commands: - - uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@fix-rocm-7.0-warp-size-constexpr' - # NOTE: The rest is in complete parity with CUDA tests - - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2' - - pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB - label: Language Models Test (Extended Generation) # 80min timeout_in_minutes: 110 @@ -109,7 +99,7 @@ steps: - pytest -v -s models/language/pooling -m 'not core_model' mirror: amd: - device: mi325_1 + device: mi250_1 depends_on: - image-build-amd diff --git a/.buildkite/test_areas/models_multimodal.yaml b/.buildkite/test_areas/models_multimodal.yaml index a18b6b39112d..c52d1a2aaade 100644 --- a/.buildkite/test_areas/models_multimodal.yaml +++ b/.buildkite/test_areas/models_multimodal.yaml @@ -80,7 +80,7 @@ steps: - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model' mirror: amd: - device: mi325_1 + device: mi250_1 depends_on: - image-build-amd diff --git a/.buildkite/test_areas/plugins.yaml b/.buildkite/test_areas/plugins.yaml index 34747a2350db..3b703efd6c65 100644 --- a/.buildkite/test_areas/plugins.yaml +++ b/.buildkite/test_areas/plugins.yaml @@ -41,6 +41,6 @@ steps: - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins mirror: amd: - device: mi325_2 + device: mi250_2 depends_on: - image-build-amd diff --git a/.buildkite/test_areas/quantization.yaml b/.buildkite/test_areas/quantization.yaml index 9e25df9c3c57..5465c0bc0fe5 100644 --- a/.buildkite/test_areas/quantization.yaml +++ b/.buildkite/test_areas/quantization.yaml @@ -46,6 +46,6 @@ steps: - pytest -v -s models/quantization mirror: amd: - device: mi325_1 + device: mi355_1 depends_on: - image-build-amd diff --git a/.buildkite/test_areas/samplers.yaml b/.buildkite/test_areas/samplers.yaml index 2052a379827a..b782f188e220 100644 --- a/.buildkite/test_areas/samplers.yaml +++ b/.buildkite/test_areas/samplers.yaml @@ -14,7 +14,7 @@ steps: - VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers mirror: amd: - device: mi325_1 + device: mi250_1 depends_on: - image-build-amd commands: