Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions vllm/distributed/kv_transfer/kv_connector/v1/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
PromMetricT,
)
from vllm.forward_context import ForwardContext
from vllm.v1.core.block_pool import BlockPool
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
from vllm.v1.kv_cache_interface import KVCacheConfig
from vllm.v1.request import Request
Expand Down Expand Up @@ -446,6 +447,16 @@ def build_connector_worker_meta(self) -> KVConnectorWorkerMetadata | None:
# Scheduler-side methods
# ==============================

def bind_gpu_block_pool(self, gpu_block_pool: "BlockPool") -> None:
"""
Bind the GPU block pool to the connector for per-GPU block status tracking.
For example, inc/dec ref counts, or iterate over the prefix cache blocks.

Args:
gpu_block_pool: the GPU block pool.
"""
return
Comment thread
ivanium marked this conversation as resolved.

@abstractmethod
def get_num_new_matched_tokens(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
if TYPE_CHECKING:
from vllm.distributed.kv_events import KVCacheEvent
from vllm.forward_context import ForwardContext
from vllm.v1.core.block_pool import BlockPool
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
from vllm.v1.kv_cache_interface import KVCacheConfig
from vllm.v1.request import Request
Expand Down Expand Up @@ -219,6 +220,10 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
for c in self._connectors:
c.register_kv_caches(kv_caches)

def bind_gpu_block_pool(self, gpu_block_pool: "BlockPool") -> None:
for c in self._connectors:
c.bind_gpu_block_pool(gpu_block_pool)

# We must override the base class method here because we need to bind
# the metadata to each connector in the order of the connectors in the
# MultiKVConnectorMetadata.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ def build_connector_worker_meta(self):

# --- Scheduler-side methods ---

# NOTE: New API only for SimpleCPUOffloadConnector.
def bind_gpu_block_pool(self, gpu_block_pool: "BlockPool") -> None:
if self.scheduler_manager is not None:
self.scheduler_manager.bind_gpu_block_pool(gpu_block_pool)
Expand Down
4 changes: 1 addition & 3 deletions vllm/v1/core/sched/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,9 +236,7 @@ def __init__(
)
# Bind GPU block pool to the KV connector. This must happen after
# kv_cache_manager is constructed so block_pool is available.
if self.connector is not None and hasattr(
self.connector, "bind_gpu_block_pool"
):
if self.connector is not None:
self.connector.bind_gpu_block_pool(self.kv_cache_manager.block_pool)

self.use_pp = self.parallel_config.pipeline_parallel_size > 1
Expand Down
Loading