Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions vllm/v1/core/block_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,13 +413,21 @@
ordered_blocks: A list of blocks to free ordered by their eviction
priority.
"""
# Materialize the iterable to allow multiple passes.
blocks_list = list(ordered_blocks)
for block in blocks_list:
# Identify blocks with hash (LRU cache) and blocks without hash (will never match in APC)

Check failure on line 416 in vllm/v1/core/block_pool.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/v1/core/block_pool.py:416:89: E501 Line too long (97 > 88)

Check failure on line 416 in vllm/v1/core/block_pool.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/v1/core/block_pool.py:416:89: E501 Line too long (97 > 88)
blocks_with_hash = []
blocks_without_hash = []
for block in ordered_blocks:
block.ref_cnt -= 1
self.free_block_queue.append_n(
[block for block in blocks_list if block.ref_cnt == 0 and not block.is_null]
)
if block.ref_cnt == 0 and not block.is_null:
if block.block_hash is None:
blocks_without_hash.append(block)
else:
blocks_with_hash.append(block)

# Allow immediate reallocation of blocks without hash
self.free_block_queue.prepend_n(blocks_without_hash)
# Append to LRU queue blocks for potential reuse
self.free_block_queue.append_n(blocks_with_hash)

def evict_blocks(self, block_ids: set[int]) -> None:
"""evict blocks from the prefix cache by their block IDs.
Expand Down
27 changes: 27 additions & 0 deletions vllm/v1/core/kv_cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,33 @@ def append_n(self, blocks: list[KVCacheBlock]) -> None:

self.num_free_blocks += len(blocks)

def prepend_n(self, blocks: list[KVCacheBlock]) -> None:
"""Put a list of blocks at the head of the free list (for immediate reuse).

Args:
blocks: The blocks to prepend.
"""
if len(blocks) == 0:
return

first_block = self.fake_free_list_head.next_free_block
assert first_block is not None, (
"next_free_block of fake_free_list_head should always exist"
)

# Add inter-connections between consecutive blocks
prev_block = self.fake_free_list_head
for block in blocks:
block.prev_free_block = prev_block
prev_block.next_free_block = block
prev_block = block

# Connect the last block of <blocks> to the original first block
prev_block.next_free_block = first_block
first_block.prev_free_block = prev_block

self.num_free_blocks += len(blocks)

def get_all_free_blocks(self) -> list[KVCacheBlock]:
"""Get all free blocks in the free list. Mainly used for testing.

Expand Down
Loading