Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .buildkite/test_areas/engine.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ steps:
- pytest -v -s v1/engine --ignore v1/engine/test_preprocess_error_handling.py
mirror:
amd:
device: mi325_8
device: mi325_1
depends_on:
- image-build-amd
commands:
Expand Down
51 changes: 51 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1327,6 +1327,57 @@ def wrapper(f: Callable[_P, None]) -> Callable[_P, None]:
return wrapper


def gpu_tier_mark(*, min_gpus: int = 1, max_gpus: int | None = None):
"""
Mark a test to only run when the GPU count falls within [min_gpus, max_gpus].

Examples:
@gpu_tier_mark(min_gpus=2) # only on multi-GPU
@gpu_tier_mark(max_gpus=1) # only on single-GPU
@gpu_tier_mark(min_gpus=2, max_gpus=4) # 2-4 GPUs only
"""
gpu_count = cuda_device_count_stateless()
marks = []

if min_gpus > 1:
marks.append(pytest.mark.distributed(num_gpus=min_gpus))

reasons = []
if gpu_count < min_gpus:
reasons.append(f"Need at least {min_gpus} GPUs (have {gpu_count})")
if max_gpus is not None and gpu_count > max_gpus:
reasons.append(f"Need at most {max_gpus} GPUs (have {gpu_count})")

if reasons:
marks.append(pytest.mark.skipif(True, reason="; ".join(reasons)))

return marks


def single_gpu_only(f=None):
"""Skip this test when running in a multi-GPU environment."""
marks = gpu_tier_mark(max_gpus=1)

def wrapper(func):
for mark in reversed(marks):
func = mark(func)
return func

return wrapper(f) if f is not None else wrapper


def multi_gpu_only(*, num_gpus: int = 2):
"""Skip this test when running on fewer than num_gpus GPUs."""
marks = gpu_tier_mark(min_gpus=num_gpus)

def wrapper(f):
for mark in reversed(marks):
f = mark(f)
return f

return wrapper


async def completions_with_server_args(
prompts: list[str],
model_name: str,
Expand Down
4 changes: 4 additions & 0 deletions tests/v1/e2e/test_async_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytest
import torch._dynamo.config as dynamo_config

from tests.utils import large_gpu_mark, single_gpu_only
from vllm import SamplingParams
from vllm.logprobs import Logprob
from vllm.platforms import current_platform
Expand Down Expand Up @@ -36,6 +37,7 @@
)


@single_gpu_only
def test_without_spec_decoding(
sample_json_schema,
monkeypatch: pytest.MonkeyPatch,
Expand Down Expand Up @@ -95,6 +97,8 @@ def test_without_spec_decoding(
run_tests(monkeypatch, MODEL, test_configs, test_sampling_params)


@single_gpu_only
@large_gpu_mark(min_gb=16)
def test_with_spec_decoding(sample_json_schema, monkeypatch: pytest.MonkeyPatch):
"""Test consistency and acceptance rates with some different combos of
preemption, executor, async scheduling, prefill chunking,
Expand Down
Loading