vllm-project · s3woz · May 6, 2026 · May 14, 2026 · Jun 2, 2026 · Jun 10, 2026
@@ -428,17 +428,24 @@ def free_blocks(
             prepend: Whether to put newly-free blocks at the front of the free
                 queue to be prioritized for reuse.
         """
-        # Materialize the iterable to allow multiple passes.
-        blocks_list = list(ordered_blocks)
-        for block in blocks_list:
+        # Identify blocks with hash (LRU cache) and without it (will never match in APC)
+        blocks_with_hash = []
+        blocks_without_hash = []
+        for block in ordered_blocks:
             block.ref_cnt -= 1
-        freed_blocks = [
-            block for block in blocks_list if block.ref_cnt == 0 and not block.is_null
-        ]
+            if block.ref_cnt == 0 and not block.is_null:
+                if block.block_hash is None:
+                    blocks_without_hash.append(block)
+                else:
+                    blocks_with_hash.append(block)
+
         if prepend:
-            self.free_block_queue.prepend_n(freed_blocks)
+            self.free_block_queue.prepend_n(blocks_with_hash)
         else:
-            self.free_block_queue.append_n(freed_blocks)
+            self.free_block_queue.append_n(blocks_with_hash)
+
+        # Blocks without hash always get evicted first - prepend them last to the tail
+        self.free_block_queue.prepend_n(blocks_without_hash)
 
     def evict_blocks(self, block_ids: set[int]) -> None:
         """evict blocks from the prefix cache by their block IDs.