diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py index 376215e06660..cbc0ac926bca 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py @@ -24,6 +24,7 @@ if TYPE_CHECKING: from vllm.forward_context import ForwardContext + from vllm.v1.attention.backend import AttentionBackend from vllm.v1.core.kv_cache_manager import KVCacheBlocks from vllm.v1.kv_cache_interface import KVCacheConfig from vllm.v1.request import Request @@ -70,6 +71,14 @@ def __repr__(self) -> str: class LMCacheConnectorV1(KVConnectorBase_V1): + @property + def prefer_cross_layer_blocks(self) -> bool: + extra_config = self._kv_transfer_config.kv_connector_extra_config + val = extra_config.get("enable_cross_layers_blocks", False) + if isinstance(val, str): + return val.lower() in ("true", "1", "yes") + return bool(val) + def __init__( self, vllm_config: "VllmConfig", @@ -123,6 +132,26 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): "please check and use the latest version" ) + def register_cross_layers_kv_cache( + self, + cross_layers_kv_cache: torch.Tensor, + cross_layers_attn_backend: type["AttentionBackend"], + ): + """ + Initialize with the KV caches. Useful for pre-registering the + KV Caches in the KVConnector (e.g. for NIXL). + + Args: + cross_layers_kv_cache: kv cache of all layers + """ + if hasattr(self._lmcache_engine, "register_cross_layers_kv_cache"): + self._lmcache_engine.register_cross_layers_kv_cache(cross_layers_kv_cache) + else: + logger.warning( + "LMCache engine does not support register_cross_layers_kv_cache, " + "please check and use the latest version" + ) + def start_load_kv(self, forward_context: "ForwardContext", **kwargs: Any) -> None: """ Start loading the KV cache from the connector to vLLM's paged