From c8bab2d036c0849942b463e849332970fa18dc07 Mon Sep 17 00:00:00 2001
From: Matthew Bonanni <mbonanni@redhat.com>
Date: Mon, 30 Mar 2026 15:56:07 -0400
Subject: [PATCH 1/2] Fix

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
---
 vllm/v1/attention/backends/flash_attn.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py
index 245995be2642..f36863376dec 100755
--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -369,8 +369,11 @@ def build(
         slot_mapping = common_attn_metadata.slot_mapping
         causal = common_attn_metadata.causal
 
-        # the overhead of the aot schedule is not worth it for spec-decode
-        aot_schedule = self.aot_schedule and not fast_build
+        # Disable AOT schedule for spec-decode proposer (not worth the overhead)
+        # and for batch invariance (schedule varies with max_seqlen_q/k).
+        aot_schedule = (
+            self.aot_schedule and not fast_build and not envs.VLLM_BATCH_INVARIANT
+        )
 
         if self.aot_sliding_window is None:
             self.aot_sliding_window = (-1, -1)

From 25d6f64d5e18072ba37f3b4745f2cb35727de98c Mon Sep 17 00:00:00 2001
From: Matthew Bonanni <mbonanni@redhat.com>
Date: Mon, 30 Mar 2026 16:54:24 -0400
Subject: [PATCH 2/2] Restore original num_expected_tokens

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
---
 tests/v1/distributed/test_eagle_dp.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/v1/distributed/test_eagle_dp.py b/tests/v1/distributed/test_eagle_dp.py
index e20893b63632..7b6731788ef3 100644
--- a/tests/v1/distributed/test_eagle_dp.py
+++ b/tests/v1/distributed/test_eagle_dp.py
@@ -69,9 +69,7 @@ async def test_run_eagle_dp(monkeypatch: pytest.MonkeyPatch, attn_backend: str):
     )
 
     prompt = "This is a test of data parallel with eagle"
-    # This test might be flaky, see
-    # https://github.com/vllm-project/vllm/issues/31913
-    num_expected_tokens = 20
+    num_expected_tokens = 100
     sampling_params = SamplingParams(
         max_tokens=num_expected_tokens,
         ignore_eos=True,