diff --git a/tests/kernels/moe/test_moe.py b/tests/kernels/moe/test_moe.py index abf3e3667a75..d68310060386 100644 --- a/tests/kernels/moe/test_moe.py +++ b/tests/kernels/moe/test_moe.py @@ -286,6 +286,7 @@ def test_mixtral_moe(dtype: torch.dtype, padding: bool, use_rocm_aiter: bool, atol=mixtral_moe_tol[dtype]) +@pytest.mark.flaky(reruns=2) @pytest.mark.parametrize("m", [1, 123, 666]) @pytest.mark.parametrize("n", [128, 1024]) @pytest.mark.parametrize("k", [256, 2048]) diff --git a/tests/tensorizer_loader/conftest.py b/tests/tensorizer_loader/conftest.py index a88ae8cda73d..7efef163d2b9 100644 --- a/tests/tensorizer_loader/conftest.py +++ b/tests/tensorizer_loader/conftest.py @@ -1,12 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 - -import functools -import gc -from typing import Callable, TypeVar - import pytest -import torch -from typing_extensions import ParamSpec from vllm.distributed import cleanup_dist_env_and_memory from vllm.model_executor.model_loader.tensorizer import TensorizerConfig @@ -25,32 +18,6 @@ def cleanup(): cleanup_dist_env_and_memory(shutdown_ray=True) -_P = ParamSpec("_P") -_R = TypeVar("_R") - - -def retry_until_skip(n: int): - - def decorator_retry(func: Callable[_P, _R]) -> Callable[_P, _R]: - - @functools.wraps(func) - def wrapper_retry(*args: _P.args, **kwargs: _P.kwargs) -> _R: - for i in range(n): - try: - return func(*args, **kwargs) - except AssertionError: - gc.collect() - torch.cuda.empty_cache() - if i == n - 1: - pytest.skip(f"Skipping test after {n} attempts.") - - raise AssertionError("Code should not be reached") - - return wrapper_retry - - return decorator_retry - - @pytest.fixture(autouse=True) def tensorizer_config(): config = TensorizerConfig(tensorizer_uri="vllm") diff --git a/tests/tensorizer_loader/test_tensorizer.py b/tests/tensorizer_loader/test_tensorizer.py index 5b9661bf6b05..7136dd44de03 100644 --- a/tests/tensorizer_loader/test_tensorizer.py +++ b/tests/tensorizer_loader/test_tensorizer.py @@ -28,7 +28,6 @@ from vllm.utils import PlaceholderModule, import_from_path from ..utils import VLLM_PATH, RemoteOpenAIServer -from .conftest import retry_until_skip try: from tensorizer import EncryptionParams @@ -325,7 +324,7 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs( assert outputs == deserialized_outputs -@retry_until_skip(3) +@pytest.mark.flaky(reruns=3) def test_vllm_tensorized_model_has_same_outputs(vllm_runner, tmp_path): gc.collect() torch.cuda.empty_cache()