From 0cd970fd194ee90fd921be7ca9878f1f3991e161 Mon Sep 17 00:00:00 2001
From: Matthew Bonanni <mbonanni@redhat.com>
Date: Sun, 8 Mar 2026 18:26:24 +0000
Subject: [PATCH 1/2] Fix

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
---
 vllm/v1/cudagraph_dispatcher.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/vllm/v1/cudagraph_dispatcher.py b/vllm/v1/cudagraph_dispatcher.py
index 701c97d6de42..200cf0f05e87 100644
--- a/vllm/v1/cudagraph_dispatcher.py
+++ b/vllm/v1/cudagraph_dispatcher.py
@@ -166,6 +166,11 @@ def initialize_cudagraph_keys(
         # get the correct cudagraph mode after backend support is resolved.
         self.cudagraph_mode = cudagraph_mode
 
+        # Clear any stale keys from previous initialization (e.g. from
+        # CUDA graph memory profiling which uses a temporary KV cache).
+        for key_set in self.cudagraph_keys.values():
+            key_set.clear()
+
         # Early exit if cudagraphs are disabled
         if cudagraph_mode == CUDAGraphMode.NONE:
             self.keys_initialized = True

From a4f7d2f87a209bb19cf6e1fa9b9a32b12db20844 Mon Sep 17 00:00:00 2001
From: Matthew Bonanni <mbonanni@redhat.com>
Date: Sun, 8 Mar 2026 18:34:22 +0000
Subject: [PATCH 2/2] Move to same place as other clearing logic

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
---
 vllm/v1/cudagraph_dispatcher.py    | 5 -----
 vllm/v1/worker/gpu_model_runner.py | 3 +++
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/vllm/v1/cudagraph_dispatcher.py b/vllm/v1/cudagraph_dispatcher.py
index 200cf0f05e87..701c97d6de42 100644
--- a/vllm/v1/cudagraph_dispatcher.py
+++ b/vllm/v1/cudagraph_dispatcher.py
@@ -166,11 +166,6 @@ def initialize_cudagraph_keys(
         # get the correct cudagraph mode after backend support is resolved.
         self.cudagraph_mode = cudagraph_mode
 
-        # Clear any stale keys from previous initialization (e.g. from
-        # CUDA graph memory profiling which uses a temporary KV cache).
-        for key_set in self.cudagraph_keys.values():
-            key_set.clear()
-
         # Early exit if cudagraphs are disabled
         if cudagraph_mode == CUDAGraphMode.NONE:
             self.keys_initialized = True
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 08dbd614fdcf..d11c8ad19303 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -5638,6 +5638,9 @@ def profile_cudagraph_memory(self) -> int:
         for instance in list(CUDAGraphWrapper._all_instances):
             if id(instance) in original_pools:
                 instance.graph_pool = original_pools[id(instance)]
+        for key_set in self.cudagraph_dispatcher.cudagraph_keys.values():
+            key_set.clear()
+        self.cudagraph_dispatcher.keys_initialized = False
         self.maybe_remove_all_loras(self.lora_config)
         self._cleanup_profiling_kv_cache()
         compilation_counter.num_cudagraph_captured = saved_num_cudagraph_captured