diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 8da851471451..dbcbc78ddd8a 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -39,8 +39,7 @@ ##################################################################################################################################### # # # IMPORTANT: # -# * Currently AMD CI has MI300 agents, MI325 agents, and MI355 agents. Of those, AMD is using mostly MI325 and MI355. AMD team # -# is actively working on enabling more MI300 machines. All upcoming feature improvements are tracked in: # +# * Currently AMD CI has MI250 agents, MI325 agents, and MI355 agents. All upcoming feature improvements are tracked in: # # https://github.com/vllm-project/vllm/issues/34994 # # # #-----------------------------------------------------------------------------------------------------------------------------------# @@ -49,13 +48,15 @@ # * [Pytorch Nightly Dependency Override Check]: if this test fails, it means the nightly torch version is not compatible with # # some of the dependencies. Please check the error message and add the package to # # whitelist in `/vllm/tools/pre_commit/generate_nightly_torch_test.py`. # -# * [Entrypoints Integration Test (LLM)]: # +# * [Entrypoints Integration (LLM)]: # # - {`pytest -v -s entrypoints/llm/test_generate.py`}: It needs a clean process # # - {`pytest -v -s entrypoints/offline_mode`}: Needs to avoid interference with other tests # -# * [V1 Test e2e + engine]: The test uses 4 GPUs, but we schedule it on 8-GPU machines for stability. See discussion here: # -# https://github.com/vllm-project/vllm/pull/31040 # -# * [V1 others]: # -# - Split the tests to avoid interference # +# * [Engine / Engine (1 GPU) / e2e Scheduling / e2e Core / V1 e2e / Spec Decode / V1 Sample + Logits / V1 Core + KV + Metrics]: # +# - Previously a single "V1 Test e2e + engine" step, now split across multiple groups. # +# - V1 e2e (2/4 GPUs) uses 4 GPUs but is scheduled on 8-GPU machines for stability. See: # +# https://github.com/vllm-project/vllm/pull/31040 # +# * [V1 Sample + Logits / V1 Core + KV + Metrics / V1 others (CPU)]: # +# - Previously a single "V1 others" step, now split to avoid interference. # # - Integration test for streaming correctness (requires special branch for __harness__ lib). # # * [V1 others (CPU)]: Split the tests to avoid interference # # * [PyTorch Compilation Unit Tests]: Run unit tests defined directly under `compile/`, not including subdirectories, which # @@ -83,9 +84,9 @@ # run plamo2 model in vLLM. # # * [Language Models Test (Extended Generation)]: Install fast path packages for testing against transformers (mamba, conv1d) # # and to run plamo2 model in vLLM. # -# * [Multi-Modal Models (Standard)]: # +# * [Multi-Modal Models (Standard) 1-4]: # # - Do NOT remove `VLLM_WORKER_MULTIPROC_METHOD=spawn` setting as ROCm requires this for certain models to function. # -# * [Transformers Nightly Models Test]: Whisper needs `VLLM_WORKER_MULTIPROC_METHOD=spawn` to avoid deadlock. # +# * [Transformers Nightly Models]: Whisper needs `VLLM_WORKER_MULTIPROC_METHOD=spawn` to avoid deadlock. # # * [Plugin Tests (2 GPUs)]: # # - {`pytest -v -s entrypoints/openai/test_oot_registration.py`}: It needs a clean process # # - {`pytest -v -s models/test_oot_registration.py`}: It needs a clean process # @@ -94,11 +95,11 @@ # - There is some Tensor Parallelism related processing logic in LoRA that requires multi-GPU testing for validation. # # - {`pytest -v -s -x lora/test_gptoss_tp.py`}: Disabled for now because MXFP4 backend on non-cuda platform doesn't support # # LoRA yet. # -# * [Distributed Tests (GPU_TAG)]: Don't test llama model here, it seems hf implementation is buggy. See: # -# https://github.com/vllm-project/vllm/pull/5689 # -# * [Distributed Tests (GPU_TAG)]: Some old E2E tests were removed in https://github.com/vllm-project/vllm/pull/33293 in # -# favor of new tests in fusions_e2e. We avoid replicating the new jobs in # -# this file as it's deprecated. # +# * [Distributed Tests (NxGPUs)(HW-TAG)]: Don't test llama model here, it seems hf implementation is buggy. See: # +# https://github.com/vllm-project/vllm/pull/5689 # +# * [Distributed Tests (NxGPUs)(HW-TAG)]: Some old E2E tests were removed in https://github.com/vllm-project/vllm/pull/33293 # +# in favor of new tests in fusions_e2e. We avoid replicating the new jobs in # +# this file as it's deprecated. # # # ##################################################################################################################################### diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py index c16efd065e1b..76cbe6e63347 100644 --- a/tests/models/multimodal/generation/test_common.py +++ b/tests/models/multimodal/generation/test_common.py @@ -220,7 +220,10 @@ vllm_runner_kwargs={ "model_impl": "transformers", }, - marks=[pytest.mark.core_model], + marks=[ + pytest.mark.core_model, + *([large_gpu_mark(min_gb=80)] if current_platform.is_rocm() else []), + ], ), "idefics3-transformers": VLMTestInfo( models=["HuggingFaceTB/SmolVLM-256M-Instruct"],