Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions tests/models/language/pooling/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Pytest configuration for vLLM language generation tests."""

import warnings

import torch

from vllm.platforms import current_platform


def pytest_sessionstart(session):
"""Configure ROCm-specific settings before test session starts."""
if not current_platform.is_rocm():
return

# Disable Flash/MemEfficient SDP on ROCm to avoid HF Transformers
# accuracy issues: https://github.com/vllm-project/vllm/issues/30167
# TODO: Remove once ROCm SDP accuracy issues are resolved on HuggingFace
torch.backends.cuda.enable_flash_sdp(False)
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_math_sdp(True)
torch.set_float32_matmul_precision("high")
warnings.warn(
"ROCm: Disabled flash_sdp and mem_efficient_sdp, enabled math_sdp "
"to avoid HuggingFace Transformers accuracy issues",
UserWarning,
stacklevel=1,
)
Comment on lines +1 to +29
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Using pytest_sessionstart to modify global state like torch settings can have unintended side effects on other tests that run in the same session, as these settings are not reverted. This can lead to slower execution or unexpected behavior in unrelated tests.

A more robust and idiomatic pytest approach is to use a fixture with autouse=True and an appropriate scope (e.g., module). This ensures that the settings are applied only for the relevant tests and, crucially, that the original settings are restored after the tests in the module have completed, preventing any impact on other parts of the test suite.

I've suggested a refactoring to use a module-scoped autouse fixture which encapsulates the setup and teardown logic cleanly.

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Pytest configuration for vLLM language pooling tests."""

import warnings

import pytest
import torch

from vllm.platforms import current_platform


@pytest.fixture(scope="module", autouse=True)
def rocm_precision_workaround():
    """Workaround for numerical precision issues on ROCm for pooling tests."""
    if not current_platform.is_rocm():
        yield
        return

    # Save original settings
    orig_flash = torch.backends.cuda.flash_sdp_enabled()
    orig_mem_eff = torch.backends.cuda.mem_efficient_sdp_enabled()
    orig_math = torch.backends.cuda.math_sdp_enabled()
    orig_matmul_precision = torch.get_float32_matmul_precision()

    try:
        # Disable Flash/MemEfficient SDP on ROCm to avoid HF Transformers
        # accuracy issues: https://github.com/vllm-project/vllm/issues/30167
        # TODO: Remove once ROCm SDP accuracy issues are resolved on HuggingFace
        torch.backends.cuda.enable_flash_sdp(False)
        torch.backends.cuda.enable_mem_efficient_sdp(False)
        torch.backends.cuda.enable_math_sdp(True)
        torch.set_float32_matmul_precision("highest")
        warnings.warn(
            "ROCm: Disabled flash_sdp and mem_efficient_sdp, enabled math_sdp "
            "to avoid HuggingFace Transformers accuracy issues for pooling tests.",
            UserWarning,
            stacklevel=2,
        )
        yield
    finally:
        # Restore original settings
        torch.backends.cuda.enable_flash_sdp(orig_flash)
        torch.backends.cuda.enable_mem_efficient_sdp(orig_mem_eff)
        torch.backends.cuda.enable_math_sdp(orig_math)
        torch.set_float32_matmul_precision(orig_matmul_precision)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is a bit too overengineered, and might even not be functional. The problem is in one specific test inside the Language Models Test (Extended Pooling) group.