Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

if TYPE_CHECKING:
from vllm.forward_context import ForwardContext
from vllm.v1.attention.backend import AttentionBackend
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
from vllm.v1.kv_cache_interface import KVCacheConfig
from vllm.v1.request import Request
Expand Down Expand Up @@ -70,6 +71,14 @@ def __repr__(self) -> str:


class LMCacheConnectorV1(KVConnectorBase_V1):
@property
def prefer_cross_layer_blocks(self) -> bool:
extra_config = self._kv_transfer_config.kv_connector_extra_config
val = extra_config.get("enable_cross_layers_blocks", False)
if isinstance(val, str):
return val.lower() in ("true", "1", "yes")
return bool(val)

def __init__(
self,
vllm_config: "VllmConfig",
Expand Down Expand Up @@ -123,6 +132,26 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
"please check and use the latest version"
)

def register_cross_layers_kv_cache(
self,
cross_layers_kv_cache: torch.Tensor,
cross_layers_attn_backend: type["AttentionBackend"],
):
"""
Initialize with the KV caches. Useful for pre-registering the
KV Caches in the KVConnector (e.g. for NIXL).

Args:
cross_layers_kv_cache: kv cache of all layers
"""
if hasattr(self._lmcache_engine, "register_cross_layers_kv_cache"):
self._lmcache_engine.register_cross_layers_kv_cache(cross_layers_kv_cache)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The cross_layers_attn_backend parameter is unused in this method call. The base class KVConnectorBase_V1 includes this parameter in its register_cross_layers_kv_cache signature, suggesting it's intended to be used. It should be passed to the underlying _lmcache_engine's method to ensure correct functionality, assuming the engine's method expects it.

            self._lmcache_engine.register_cross_layers_kv_cache(cross_layers_kv_cache, cross_layers_attn_backend)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But lmcache engine doesn't need it.

else:
logger.warning(
"LMCache engine does not support register_cross_layers_kv_cache, "
"please check and use the latest version"
)

def start_load_kv(self, forward_context: "ForwardContext", **kwargs: Any) -> None:
"""
Start loading the KV cache from the connector to vLLM's paged
Expand Down