From f0c14f7e800628f6197a2b8c4731dfa5103aa4e9 Mon Sep 17 00:00:00 2001 From: yewentao256 Date: Wed, 19 Nov 2025 13:50:10 -0800 Subject: [PATCH 1/3] fix torch dynamo warning Signed-off-by: yewentao256 --- tests/v1/determinism/conftest.py | 9 ++------- tests/v1/determinism/test_batch_invariance.py | 6 +++--- vllm/model_executor/layers/batch_invariant.py | 20 ++++++++++--------- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/tests/v1/determinism/conftest.py b/tests/v1/determinism/conftest.py index 3c2136e00584..be1a46160bc5 100644 --- a/tests/v1/determinism/conftest.py +++ b/tests/v1/determinism/conftest.py @@ -1,11 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import pytest +import os - -@pytest.fixture(autouse=True) -def enable_batch_invariant_mode(monkeypatch: pytest.MonkeyPatch): - """Automatically enable batch invariant kernel overrides for all tests.""" - monkeypatch.setenv("VLLM_BATCH_INVARIANT", "1") - yield +os.environ["VLLM_BATCH_INVARIANT"] = "1" diff --git a/tests/v1/determinism/test_batch_invariance.py b/tests/v1/determinism/test_batch_invariance.py index d4e88891512c..2a1b996aed61 100644 --- a/tests/v1/determinism/test_batch_invariance.py +++ b/tests/v1/determinism/test_batch_invariance.py @@ -8,6 +8,7 @@ import torch from utils import _extract_step_logprobs, _random_prompt, skip_unsupported +import vllm.model_executor.layers.batch_invariant as batch_invariant from vllm import LLM, SamplingParams from vllm.platforms import current_platform @@ -454,13 +455,12 @@ def test_logprobs_without_batch_invariance_should_fail( The test will PASS if we detect differences (proving batch invariance matters). The test will FAIL if everything matches (suggesting batch invariance isn't needed). """ - from vllm.model_executor.layers.batch_invariant import vllm_is_batch_invariant - - vllm_is_batch_invariant.cache_clear() monkeypatch.setenv("VLLM_ATTENTION_BACKEND", backend) # CRITICAL: Disable batch invariance for this test monkeypatch.setenv("VLLM_BATCH_INVARIANT", "0") + # refresh cached value + batch_invariant.VLLM_BATCH_INVARIANT = batch_invariant._read_vllm_batch_invariant() seed = int(os.getenv("VLLM_TEST_SEED", "12345")) random.seed(seed) diff --git a/vllm/model_executor/layers/batch_invariant.py b/vllm/model_executor/layers/batch_invariant.py index 5dbeb2917434..69fa6bdffd43 100644 --- a/vllm/model_executor/layers/batch_invariant.py +++ b/vllm/model_executor/layers/batch_invariant.py @@ -2,7 +2,6 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import os from collections.abc import Callable -from functools import cache from typing import Any import torch @@ -785,16 +784,19 @@ def enable_batch_invariant_mode(): torch.backends.cuda.preferred_blas_library(backend="cublaslt") -@cache -def vllm_is_batch_invariant(): - env_key = "VLLM_BATCH_INVARIANT" - is_overridden = False - val = os.getenv(env_key, "0") +def _read_vllm_batch_invariant() -> bool: + val = os.getenv("VLLM_BATCH_INVARIANT", "0") try: - is_overridden = int(val) != 0 + return int(val) != 0 except ValueError: - is_overridden = False - return is_overridden + return False + + +VLLM_BATCH_INVARIANT: bool = _read_vllm_batch_invariant() + + +def vllm_is_batch_invariant() -> bool: + return VLLM_BATCH_INVARIANT def override_envs_for_invariance(): From d0723c7a254b5bec67bb5cee55b260536025b27e Mon Sep 17 00:00:00 2001 From: yewentao256 Date: Wed, 19 Nov 2025 14:04:30 -0800 Subject: [PATCH 2/3] address comments Signed-off-by: yewentao256 --- tests/v1/determinism/conftest.py | 9 +++++++-- tests/v1/determinism/test_batch_invariance.py | 5 +---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/v1/determinism/conftest.py b/tests/v1/determinism/conftest.py index be1a46160bc5..a4df0ff0c122 100644 --- a/tests/v1/determinism/conftest.py +++ b/tests/v1/determinism/conftest.py @@ -1,6 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import pytest -import os +import vllm.model_executor.layers.batch_invariant as batch_invariant -os.environ["VLLM_BATCH_INVARIANT"] = "1" + +@pytest.fixture(autouse=True) +def enable_batch_invariant_mode(monkeypatch: pytest.MonkeyPatch): + """Automatically enable batch invariant kernel overrides for all tests.""" + monkeypatch.setattr(batch_invariant, "VLLM_BATCH_INVARIANT", True) diff --git a/tests/v1/determinism/test_batch_invariance.py b/tests/v1/determinism/test_batch_invariance.py index 2a1b996aed61..69e4a6949645 100644 --- a/tests/v1/determinism/test_batch_invariance.py +++ b/tests/v1/determinism/test_batch_invariance.py @@ -458,10 +458,7 @@ def test_logprobs_without_batch_invariance_should_fail( monkeypatch.setenv("VLLM_ATTENTION_BACKEND", backend) # CRITICAL: Disable batch invariance for this test - monkeypatch.setenv("VLLM_BATCH_INVARIANT", "0") - # refresh cached value - batch_invariant.VLLM_BATCH_INVARIANT = batch_invariant._read_vllm_batch_invariant() - + monkeypatch.setattr(batch_invariant, "VLLM_BATCH_INVARIANT", False) seed = int(os.getenv("VLLM_TEST_SEED", "12345")) random.seed(seed) model_name = resolve_model_name(backend) From ee3278a490114cc27ae5d26a8b202c12283da685 Mon Sep 17 00:00:00 2001 From: yewentao256 Date: Wed, 19 Nov 2025 14:35:40 -0800 Subject: [PATCH 3/3] fix test Signed-off-by: yewentao256 --- tests/v1/determinism/conftest.py | 1 + tests/v1/determinism/test_batch_invariance.py | 28 +++++-------------- .../test_online_batch_invariance.py | 12 ++++++-- tests/v1/determinism/utils.py | 20 +++++++++++++ 4 files changed, 37 insertions(+), 24 deletions(-) diff --git a/tests/v1/determinism/conftest.py b/tests/v1/determinism/conftest.py index a4df0ff0c122..bde02bbd0d5c 100644 --- a/tests/v1/determinism/conftest.py +++ b/tests/v1/determinism/conftest.py @@ -9,3 +9,4 @@ def enable_batch_invariant_mode(monkeypatch: pytest.MonkeyPatch): """Automatically enable batch invariant kernel overrides for all tests.""" monkeypatch.setattr(batch_invariant, "VLLM_BATCH_INVARIANT", True) + monkeypatch.setenv("VLLM_BATCH_INVARIANT", "1") diff --git a/tests/v1/determinism/test_batch_invariance.py b/tests/v1/determinism/test_batch_invariance.py index 69e4a6949645..74ae5e182da7 100644 --- a/tests/v1/determinism/test_batch_invariance.py +++ b/tests/v1/determinism/test_batch_invariance.py @@ -6,30 +6,16 @@ import pytest import torch -from utils import _extract_step_logprobs, _random_prompt, skip_unsupported +from utils import ( + BACKENDS, + _extract_step_logprobs, + _random_prompt, + resolve_model_name, + skip_unsupported, +) import vllm.model_executor.layers.batch_invariant as batch_invariant from vllm import LLM, SamplingParams -from vllm.platforms import current_platform - -BACKENDS: list[str] = [ - "FLASH_ATTN", - "FLASHINFER", -] - -if current_platform.is_cuda() and current_platform.is_device_capability(90): - BACKENDS.append("FLASH_ATTN_MLA") - -DEFAULT_MODEL = "Qwen/Qwen3-1.7B" -MLA_MODEL = "deepseek-ai/DeepSeek-V2-Lite-Chat" - - -def resolve_model_name(backend: str) -> str: - """Resolve the model name for the given backend, respecting env overrides.""" - model = os.getenv("VLLM_TEST_MODEL", DEFAULT_MODEL) - if backend.endswith("MLA") and model == DEFAULT_MODEL: - return MLA_MODEL - return model @skip_unsupported diff --git a/tests/v1/determinism/test_online_batch_invariance.py b/tests/v1/determinism/test_online_batch_invariance.py index 23f47863dd23..d74b435797f8 100644 --- a/tests/v1/determinism/test_online_batch_invariance.py +++ b/tests/v1/determinism/test_online_batch_invariance.py @@ -16,7 +16,8 @@ from typing import Any import openai -from utils import _random_prompt, skip_unsupported +import pytest +from utils import BACKENDS, _random_prompt, resolve_model_name, skip_unsupported from tests.utils import RemoteOpenAIServer @@ -133,9 +134,14 @@ def _compare_bs1_vs_bsn_single_process( @skip_unsupported -def test_logprobs_bitwise_batch_invariance_bs1_vs_bsN(): +@pytest.mark.parametrize("backend", BACKENDS) +def test_logprobs_bitwise_batch_invariance_bs1_vs_bsN( + backend: str, monkeypatch: pytest.MonkeyPatch +) -> None: random.seed(int(os.getenv("VLLM_TEST_SEED", "12345"))) - model_name = os.getenv("VLLM_TEST_MODEL", "Qwen/Qwen3-1.7B") + # Override backend for this test (and the RemoteOpenAIServer child process). + monkeypatch.setenv("VLLM_ATTENTION_BACKEND", backend) + model_name = resolve_model_name(backend) prompts_all = [_random_prompt(10, 50) for _ in range(32)] sp_kwargs: dict[str, Any] = { diff --git a/tests/v1/determinism/utils.py b/tests/v1/determinism/utils.py index 5141837faea0..7ee442551e2c 100644 --- a/tests/v1/determinism/utils.py +++ b/tests/v1/determinism/utils.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import os import random import pytest @@ -12,6 +13,25 @@ reason="Requires CUDA and >= Hopper (SM90)", ) +BACKENDS: list[str] = [ + "FLASH_ATTN", + "FLASHINFER", +] + +if current_platform.is_cuda() and current_platform.is_device_capability(90): + BACKENDS.append("FLASH_ATTN_MLA") + +DEFAULT_MODEL = "Qwen/Qwen3-1.7B" +MLA_MODEL = "deepseek-ai/DeepSeek-V2-Lite-Chat" + + +def resolve_model_name(backend: str) -> str: + """Resolve the model name for the given backend.""" + model = os.getenv("VLLM_TEST_MODEL", DEFAULT_MODEL) + if backend.endswith("MLA") and model == DEFAULT_MODEL: + return MLA_MODEL + return model + def _random_prompt(min_words: int = 1024, max_words: int = 1024 * 2) -> str: # Generate more realistic prompts that will actually produce varied tokens