From f0c14f7e800628f6197a2b8c4731dfa5103aa4e9 Mon Sep 17 00:00:00 2001
From: yewentao256 <zhyanwentao@126.com>
Date: Wed, 19 Nov 2025 13:50:10 -0800
Subject: [PATCH 1/3] fix torch dynamo warning

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 tests/v1/determinism/conftest.py              |  9 ++-------
 tests/v1/determinism/test_batch_invariance.py |  6 +++---
 vllm/model_executor/layers/batch_invariant.py | 20 ++++++++++---------
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/tests/v1/determinism/conftest.py b/tests/v1/determinism/conftest.py
index 3c2136e00584..be1a46160bc5 100644
--- a/tests/v1/determinism/conftest.py
+++ b/tests/v1/determinism/conftest.py
@@ -1,11 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import pytest
+import os
 
-
-@pytest.fixture(autouse=True)
-def enable_batch_invariant_mode(monkeypatch: pytest.MonkeyPatch):
-    """Automatically enable batch invariant kernel overrides for all tests."""
-    monkeypatch.setenv("VLLM_BATCH_INVARIANT", "1")
-    yield
+os.environ["VLLM_BATCH_INVARIANT"] = "1"
diff --git a/tests/v1/determinism/test_batch_invariance.py b/tests/v1/determinism/test_batch_invariance.py
index d4e88891512c..2a1b996aed61 100644
--- a/tests/v1/determinism/test_batch_invariance.py
+++ b/tests/v1/determinism/test_batch_invariance.py
@@ -8,6 +8,7 @@
 import torch
 from utils import _extract_step_logprobs, _random_prompt, skip_unsupported
 
+import vllm.model_executor.layers.batch_invariant as batch_invariant
 from vllm import LLM, SamplingParams
 from vllm.platforms import current_platform
 
@@ -454,13 +455,12 @@ def test_logprobs_without_batch_invariance_should_fail(
     The test will PASS if we detect differences (proving batch invariance matters).
     The test will FAIL if everything matches (suggesting batch invariance isn't needed).
     """
-    from vllm.model_executor.layers.batch_invariant import vllm_is_batch_invariant
-
-    vllm_is_batch_invariant.cache_clear()
     monkeypatch.setenv("VLLM_ATTENTION_BACKEND", backend)
 
     # CRITICAL: Disable batch invariance for this test
     monkeypatch.setenv("VLLM_BATCH_INVARIANT", "0")
+    # refresh cached value
+    batch_invariant.VLLM_BATCH_INVARIANT = batch_invariant._read_vllm_batch_invariant()
 
     seed = int(os.getenv("VLLM_TEST_SEED", "12345"))
     random.seed(seed)
diff --git a/vllm/model_executor/layers/batch_invariant.py b/vllm/model_executor/layers/batch_invariant.py
index 5dbeb2917434..69fa6bdffd43 100644
--- a/vllm/model_executor/layers/batch_invariant.py
+++ b/vllm/model_executor/layers/batch_invariant.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import os
 from collections.abc import Callable
-from functools import cache
 from typing import Any
 
 import torch
@@ -785,16 +784,19 @@ def enable_batch_invariant_mode():
     torch.backends.cuda.preferred_blas_library(backend="cublaslt")
 
 
-@cache
-def vllm_is_batch_invariant():
-    env_key = "VLLM_BATCH_INVARIANT"
-    is_overridden = False
-    val = os.getenv(env_key, "0")
+def _read_vllm_batch_invariant() -> bool:
+    val = os.getenv("VLLM_BATCH_INVARIANT", "0")
     try:
-        is_overridden = int(val) != 0
+        return int(val) != 0
     except ValueError:
-        is_overridden = False
-    return is_overridden
+        return False
+
+
+VLLM_BATCH_INVARIANT: bool = _read_vllm_batch_invariant()
+
+
+def vllm_is_batch_invariant() -> bool:
+    return VLLM_BATCH_INVARIANT
 
 
 def override_envs_for_invariance():

From d0723c7a254b5bec67bb5cee55b260536025b27e Mon Sep 17 00:00:00 2001
From: yewentao256 <zhyanwentao@126.com>
Date: Wed, 19 Nov 2025 14:04:30 -0800
Subject: [PATCH 2/3] address comments

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 tests/v1/determinism/conftest.py              | 9 +++++++--
 tests/v1/determinism/test_batch_invariance.py | 5 +----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/v1/determinism/conftest.py b/tests/v1/determinism/conftest.py
index be1a46160bc5..a4df0ff0c122 100644
--- a/tests/v1/determinism/conftest.py
+++ b/tests/v1/determinism/conftest.py
@@ -1,6 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
 
-import os
+import vllm.model_executor.layers.batch_invariant as batch_invariant
 
-os.environ["VLLM_BATCH_INVARIANT"] = "1"
+
+@pytest.fixture(autouse=True)
+def enable_batch_invariant_mode(monkeypatch: pytest.MonkeyPatch):
+    """Automatically enable batch invariant kernel overrides for all tests."""
+    monkeypatch.setattr(batch_invariant, "VLLM_BATCH_INVARIANT", True)
diff --git a/tests/v1/determinism/test_batch_invariance.py b/tests/v1/determinism/test_batch_invariance.py
index 2a1b996aed61..69e4a6949645 100644
--- a/tests/v1/determinism/test_batch_invariance.py
+++ b/tests/v1/determinism/test_batch_invariance.py
@@ -458,10 +458,7 @@ def test_logprobs_without_batch_invariance_should_fail(
     monkeypatch.setenv("VLLM_ATTENTION_BACKEND", backend)
 
     # CRITICAL: Disable batch invariance for this test
-    monkeypatch.setenv("VLLM_BATCH_INVARIANT", "0")
-    # refresh cached value
-    batch_invariant.VLLM_BATCH_INVARIANT = batch_invariant._read_vllm_batch_invariant()
-
+    monkeypatch.setattr(batch_invariant, "VLLM_BATCH_INVARIANT", False)
     seed = int(os.getenv("VLLM_TEST_SEED", "12345"))
     random.seed(seed)
     model_name = resolve_model_name(backend)

From ee3278a490114cc27ae5d26a8b202c12283da685 Mon Sep 17 00:00:00 2001
From: yewentao256 <zhyanwentao@126.com>
Date: Wed, 19 Nov 2025 14:35:40 -0800
Subject: [PATCH 3/3] fix test

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 tests/v1/determinism/conftest.py              |  1 +
 tests/v1/determinism/test_batch_invariance.py | 28 +++++--------------
 .../test_online_batch_invariance.py           | 12 ++++++--
 tests/v1/determinism/utils.py                 | 20 +++++++++++++
 4 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/tests/v1/determinism/conftest.py b/tests/v1/determinism/conftest.py
index a4df0ff0c122..bde02bbd0d5c 100644
--- a/tests/v1/determinism/conftest.py
+++ b/tests/v1/determinism/conftest.py
@@ -9,3 +9,4 @@
 def enable_batch_invariant_mode(monkeypatch: pytest.MonkeyPatch):
     """Automatically enable batch invariant kernel overrides for all tests."""
     monkeypatch.setattr(batch_invariant, "VLLM_BATCH_INVARIANT", True)
+    monkeypatch.setenv("VLLM_BATCH_INVARIANT", "1")
diff --git a/tests/v1/determinism/test_batch_invariance.py b/tests/v1/determinism/test_batch_invariance.py
index 69e4a6949645..74ae5e182da7 100644
--- a/tests/v1/determinism/test_batch_invariance.py
+++ b/tests/v1/determinism/test_batch_invariance.py
@@ -6,30 +6,16 @@
 
 import pytest
 import torch
-from utils import _extract_step_logprobs, _random_prompt, skip_unsupported
+from utils import (
+    BACKENDS,
+    _extract_step_logprobs,
+    _random_prompt,
+    resolve_model_name,
+    skip_unsupported,
+)
 
 import vllm.model_executor.layers.batch_invariant as batch_invariant
 from vllm import LLM, SamplingParams
-from vllm.platforms import current_platform
-
-BACKENDS: list[str] = [
-    "FLASH_ATTN",
-    "FLASHINFER",
-]
-
-if current_platform.is_cuda() and current_platform.is_device_capability(90):
-    BACKENDS.append("FLASH_ATTN_MLA")
-
-DEFAULT_MODEL = "Qwen/Qwen3-1.7B"
-MLA_MODEL = "deepseek-ai/DeepSeek-V2-Lite-Chat"
-
-
-def resolve_model_name(backend: str) -> str:
-    """Resolve the model name for the given backend, respecting env overrides."""
-    model = os.getenv("VLLM_TEST_MODEL", DEFAULT_MODEL)
-    if backend.endswith("MLA") and model == DEFAULT_MODEL:
-        return MLA_MODEL
-    return model
 
 
 @skip_unsupported
diff --git a/tests/v1/determinism/test_online_batch_invariance.py b/tests/v1/determinism/test_online_batch_invariance.py
index 23f47863dd23..d74b435797f8 100644
--- a/tests/v1/determinism/test_online_batch_invariance.py
+++ b/tests/v1/determinism/test_online_batch_invariance.py
@@ -16,7 +16,8 @@
 from typing import Any
 
 import openai
-from utils import _random_prompt, skip_unsupported
+import pytest
+from utils import BACKENDS, _random_prompt, resolve_model_name, skip_unsupported
 
 from tests.utils import RemoteOpenAIServer
 
@@ -133,9 +134,14 @@ def _compare_bs1_vs_bsn_single_process(
 
 
 @skip_unsupported
-def test_logprobs_bitwise_batch_invariance_bs1_vs_bsN():
+@pytest.mark.parametrize("backend", BACKENDS)
+def test_logprobs_bitwise_batch_invariance_bs1_vs_bsN(
+    backend: str, monkeypatch: pytest.MonkeyPatch
+) -> None:
     random.seed(int(os.getenv("VLLM_TEST_SEED", "12345")))
-    model_name = os.getenv("VLLM_TEST_MODEL", "Qwen/Qwen3-1.7B")
+    # Override backend for this test (and the RemoteOpenAIServer child process).
+    monkeypatch.setenv("VLLM_ATTENTION_BACKEND", backend)
+    model_name = resolve_model_name(backend)
     prompts_all = [_random_prompt(10, 50) for _ in range(32)]
 
     sp_kwargs: dict[str, Any] = {
diff --git a/tests/v1/determinism/utils.py b/tests/v1/determinism/utils.py
index 5141837faea0..7ee442551e2c 100644
--- a/tests/v1/determinism/utils.py
+++ b/tests/v1/determinism/utils.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import os
 import random
 
 import pytest
@@ -12,6 +13,25 @@
     reason="Requires CUDA and >= Hopper (SM90)",
 )
 
+BACKENDS: list[str] = [
+    "FLASH_ATTN",
+    "FLASHINFER",
+]
+
+if current_platform.is_cuda() and current_platform.is_device_capability(90):
+    BACKENDS.append("FLASH_ATTN_MLA")
+
+DEFAULT_MODEL = "Qwen/Qwen3-1.7B"
+MLA_MODEL = "deepseek-ai/DeepSeek-V2-Lite-Chat"
+
+
+def resolve_model_name(backend: str) -> str:
+    """Resolve the model name for the given backend."""
+    model = os.getenv("VLLM_TEST_MODEL", DEFAULT_MODEL)
+    if backend.endswith("MLA") and model == DEFAULT_MODEL:
+        return MLA_MODEL
+    return model
+
 
 def _random_prompt(min_words: int = 1024, max_words: int = 1024 * 2) -> str:
     # Generate more realistic prompts that will actually produce varied tokens