vllm-project · Shaoting-Feng · Jan 30, 2026 · Jan 30, 2026 · Jan 30, 2026 · gemini-code-assist
@@ -24,6 +24,7 @@
 
 if TYPE_CHECKING:
     from vllm.forward_context import ForwardContext
+    from vllm.v1.attention.backend import AttentionBackend
     from vllm.v1.core.kv_cache_manager import KVCacheBlocks
     from vllm.v1.kv_cache_interface import KVCacheConfig
     from vllm.v1.request import Request
@@ -70,6 +71,14 @@ def __repr__(self) -> str:
 
 
 class LMCacheConnectorV1(KVConnectorBase_V1):
+    @property
+    def prefer_cross_layer_blocks(self) -> bool:
+        extra_config = self._kv_transfer_config.kv_connector_extra_config
+        val = extra_config.get("enable_cross_layers_blocks", False)
+        if isinstance(val, str):
+            return val.lower() in ("true", "1", "yes")
+        return bool(val)
+
     def __init__(
         self,
         vllm_config: "VllmConfig",
@@ -123,6 +132,26 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
                 "please check and use the latest version"
             )
 
+    def register_cross_layers_kv_cache(
+        self,
+        cross_layers_kv_cache: torch.Tensor,
+        cross_layers_attn_backend: type["AttentionBackend"],
+    ):
+        """
+        Initialize with the KV caches. Useful for pre-registering the
+        KV Caches in the KVConnector (e.g. for NIXL).
+
+        Args:
+            cross_layers_kv_cache: kv cache of all layers
+        """
+        if hasattr(self._lmcache_engine, "register_cross_layers_kv_cache"):
+            self._lmcache_engine.register_cross_layers_kv_cache(cross_layers_kv_cache)
+        else:
+            logger.warning(
+                "LMCache engine does not support register_cross_layers_kv_cache, "
+                "please check and use the latest version"
+            )
+
     def start_load_kv(self, forward_context: "ForwardContext", **kwargs: Any) -> None:
         """
         Start loading the KV cache from the connector to vLLM's paged