diff --git a/.buildkite/test_areas/entrypoints.yaml b/.buildkite/test_areas/entrypoints.yaml index 17201a07103a..5796036f3361 100644 --- a/.buildkite/test_areas/entrypoints.yaml +++ b/.buildkite/test_areas/entrypoints.yaml @@ -41,6 +41,11 @@ steps: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses - pytest -v -s entrypoints/test_chat_utils.py + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Entrypoints Integration (API Server 2) timeout_in_minutes: 130 @@ -55,6 +60,11 @@ steps: - pytest -v -s entrypoints/instrumentator - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc - pytest -v -s tool_use + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Entrypoints Integration (Pooling) timeout_in_minutes: 50 @@ -87,6 +97,11 @@ steps: - tests/v1 commands: - pytest -v -s v1/entrypoints + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: OpenAI API Correctness timeout_in_minutes: 30 diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml index d8957c217755..dd14a1eac5a4 100644 --- a/.buildkite/test_areas/misc.yaml +++ b/.buildkite/test_areas/misc.yaml @@ -87,6 +87,11 @@ steps: - python3 offline_inference/spec_decode.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048 # https://github.com/vllm-project/vllm/pull/26682 uses slightly more memory in PyTorch 2.9+ causing this test to OOM in 1xL4 GPU - python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536 + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Metrics, Tracing (2 GPUs) timeout_in_minutes: 20 diff --git a/.buildkite/test_areas/models_multimodal.yaml b/.buildkite/test_areas/models_multimodal.yaml index a1194c229866..03774de9362c 100644 --- a/.buildkite/test_areas/models_multimodal.yaml +++ b/.buildkite/test_areas/models_multimodal.yaml @@ -12,6 +12,11 @@ steps: - pip freeze | grep -E 'torch' - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing - cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model # Otherwise, mp_method="spawn" doesn't work + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Multi-Modal Processor Test (CPU) depends_on: @@ -54,6 +59,11 @@ steps: commands: - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - pytest -v -s models/multimodal -m 'not core_model' --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/processing + mirror: + amd: + device: mi325_1 + depends_on: + - image-build-amd - label: Multi-Modal Models (Extended) 2 optional: true diff --git a/.buildkite/test_areas/plugins.yaml b/.buildkite/test_areas/plugins.yaml index 7e7727fce7df..34747a2350db 100644 --- a/.buildkite/test_areas/plugins.yaml +++ b/.buildkite/test_areas/plugins.yaml @@ -39,3 +39,8 @@ steps: - pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process - pytest -v -s models/test_oot_registration.py # it needs a clean process - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins + mirror: + amd: + device: mi325_2 + depends_on: + - image-build-amd