From 2dd0759df4a18f20de41b7d21789895ff4ed2d05 Mon Sep 17 00:00:00 2001
From: Stanislaw Wozniak <stw@zurich.ibm.com>
Date: Wed, 6 May 2026 03:40:18 -0400
Subject: [PATCH 1/3] Optimized LRU logic

Signed-off-by: Stanislaw Wozniak <stw@zurich.ibm.com>
---
 vllm/v1/core/block_pool.py     | 20 ++++++++++++++------
 vllm/v1/core/kv_cache_utils.py | 27 +++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 6 deletions(-)
diff --git a/vllm/v1/core/block_pool.py b/vllm/v1/core/block_pool.py
index 9097079ef33a..74458390502d 100644
--- a/vllm/v1/core/block_pool.py
+++ b/vllm/v1/core/block_pool.py
@@ -413,13 +413,21 @@ def free_blocks(self, ordered_blocks: Iterable[KVCacheBlock]) -> None:
             ordered_blocks: A list of blocks to free ordered by their eviction
                 priority.
         """
-        # Materialize the iterable to allow multiple passes.
-        blocks_list = list(ordered_blocks)
-        for block in blocks_list:
+        # Identify blocks with hash (LRU cache) and blocks without hash (will never match in APC)
+        blocks_with_hash = []
+        blocks_without_hash = []
+        for block in ordered_blocks:
             block.ref_cnt -= 1
-        self.free_block_queue.append_n(
-            [block for block in blocks_list if block.ref_cnt == 0 and not block.is_null]
-        )
+            if block.ref_cnt == 0 and not block.is_null:
+                if block.block_hash is None:
+                    blocks_without_hash.append(block)                    
+                else:
+                    blocks_with_hash.append(block)
+        
+        # Allow immediate reallocation of blocks without hash
+        self.free_block_queue.prepend_n(blocks_without_hash)
+        # Append to LRU queue blocks for potential reuse
+        self.free_block_queue.append_n(blocks_with_hash)
 
     def evict_blocks(self, block_ids: set[int]) -> None:
         """evict blocks from the prefix cache by their block IDs.
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
index b57e10b67faa..79b4eefbb3bc 100644
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -349,6 +349,33 @@ def append_n(self, blocks: list[KVCacheBlock]) -> None:
 
         self.num_free_blocks += len(blocks)
 
+    def prepend_n(self, blocks: list[KVCacheBlock]) -> None:
+        """Put a list of blocks at the head of the free list (for immediate reuse).
+
+        Args:
+            blocks: The blocks to prepend.
+        """
+        if len(blocks) == 0:
+            return
+
+        first_block = self.fake_free_list_head.next_free_block
+        assert first_block is not None, (
+            "next_free_block of fake_free_list_head should always exist"
+        )
+        
+        # Add inter-connections between consecutive blocks
+        prev_block = self.fake_free_list_head
+        for block in blocks:
+            block.prev_free_block = prev_block
+            prev_block.next_free_block = block
+            prev_block = block
+
+        # Connect the last block of <blocks> to the original first block
+        prev_block.next_free_block = first_block
+        first_block.prev_free_block = prev_block
+
+        self.num_free_blocks += len(blocks)
+
     def get_all_free_blocks(self) -> list[KVCacheBlock]:
         """Get all free blocks in the free list. Mainly used for testing.
 

From 1c3ca429de9819deccf380c637cbb63da0b8bffd Mon Sep 17 00:00:00 2001
From: Stanislaw Wozniak <stw@zurich.ibm.com>
Date: Thu, 14 May 2026 12:22:14 -0400
Subject: [PATCH 2/3] Pre-commit fixes

Signed-off-by: Stanislaw Wozniak <stw@zurich.ibm.com>
---
 vllm/v1/core/block_pool.py     | 6 +++---
 vllm/v1/core/kv_cache_utils.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/vllm/v1/core/block_pool.py b/vllm/v1/core/block_pool.py
index 74458390502d..c45f5b3765f5 100644
--- a/vllm/v1/core/block_pool.py
+++ b/vllm/v1/core/block_pool.py
@@ -413,17 +413,17 @@ def free_blocks(self, ordered_blocks: Iterable[KVCacheBlock]) -> None:
             ordered_blocks: A list of blocks to free ordered by their eviction
                 priority.
         """
-        # Identify blocks with hash (LRU cache) and blocks without hash (will never match in APC)
+        # Identify blocks with hash (LRU cache) and without it (will never match in APC)
         blocks_with_hash = []
         blocks_without_hash = []
         for block in ordered_blocks:
             block.ref_cnt -= 1
             if block.ref_cnt == 0 and not block.is_null:
                 if block.block_hash is None:
-                    blocks_without_hash.append(block)                    
+                    blocks_without_hash.append(block)
                 else:
                     blocks_with_hash.append(block)
-        
+
         # Allow immediate reallocation of blocks without hash
         self.free_block_queue.prepend_n(blocks_without_hash)
         # Append to LRU queue blocks for potential reuse
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
index 79b4eefbb3bc..9b480ff988e7 100644
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -362,7 +362,7 @@ def prepend_n(self, blocks: list[KVCacheBlock]) -> None:
         assert first_block is not None, (
             "next_free_block of fake_free_list_head should always exist"
         )
-        
+
         # Add inter-connections between consecutive blocks
         prev_block = self.fake_free_list_head
         for block in blocks:

From 28c3805363e951dc6745545a5de5930f75314a28 Mon Sep 17 00:00:00 2001
From: Stanislaw Wozniak <stw@zurich.ibm.com>
Date: Wed, 10 Jun 2026 11:39:09 -0400
Subject: [PATCH 3/3] Comments cleanup

Signed-off-by: Stanislaw Wozniak <stw@zurich.ibm.com>
---
 vllm/v1/core/block_pool.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm/v1/core/block_pool.py b/vllm/v1/core/block_pool.py
index 52af8fabcae3..7fb5dbd1e75e 100644
--- a/vllm/v1/core/block_pool.py
+++ b/vllm/v1/core/block_pool.py
@@ -439,13 +439,12 @@ def free_blocks(
                 else:
                     blocks_with_hash.append(block)
 
-        # Append to LRU queue based on priority passed via parameter
         if prepend:
             self.free_block_queue.prepend_n(blocks_with_hash)
         else:
             self.free_block_queue.append_n(blocks_with_hash)
 
-        # Always allow immediate reallocation of blocks without hash
+        # Blocks without hash always get evicted first - prepend them last to the tail
         self.free_block_queue.prepend_n(blocks_without_hash)
 
     def evict_blocks(self, block_ids: set[int]) -> None: