From d8e8ac794c118165e73351cc97cf81b3da43ee02 Mon Sep 17 00:00:00 2001 From: zjy0516 Date: Sun, 17 May 2026 15:23:57 +0000 Subject: [PATCH 1/2] init Signed-off-by: zjy0516 --- .../kv_transfer/kv_connector/v1/nixl/worker.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py index ea8b46c28f9c..e7b42c2be43d 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py @@ -906,11 +906,16 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): else: self.block_len_per_layer.append(physical_page_size) - if cache.shape[0] != num_blocks: + expected_first_dim = ( + self._logical_num_blocks + if self.vllm_config.use_v2_model_runner + else self.num_blocks + ) + if cache.shape[0] != expected_first_dim: raise AssertionError( "All kv cache tensors must have the same number of " f"blocks; layer={layer_name}, " - f"expected_num_blocks={num_blocks}, " + f"expected_num_blocks={expected_first_dim}, " f"cache_shape={tuple(cache.shape)}, " f"cache_stride={tuple(cache.stride())}, " f"layer_spec={type(layer_spec).__name__}, " @@ -919,7 +924,8 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): f"{[backend.get_name() for backend in self.attn_backends]}, " f"kv_cache_layout={self.kv_cache_layout}, " "blocks_first=" - f"{self.transfer_topo.is_kv_layout_blocks_first}" + f"{self.transfer_topo.is_kv_layout_blocks_first}, " + f"use_v2_model_runner={self.vllm_config.use_v2_model_runner}" ) if not self.use_mla: From 299be9139cae5d135596684def0a328287a829dc Mon Sep 17 00:00:00 2001 From: zjy0516 Date: Mon, 18 May 2026 01:58:55 +0000 Subject: [PATCH 2/2] update Signed-off-by: zjy0516 --- .../kv_connector/v1/nixl/worker.py | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py index e7b42c2be43d..1b561eb81e4b 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py @@ -464,11 +464,14 @@ def _sync_block_size_with_kernel(self) -> None: kernel_block_size, ) assert self.block_size > kernel_block_size - self._physical_blocks_per_logical_kv_block = ( - self.block_size // kernel_block_size - ) - self.block_size = kernel_block_size - self.num_blocks *= self._physical_blocks_per_logical_kv_block + if self.vllm_config.use_v2_model_runner: + self._physical_blocks_per_logical_kv_block = 1 + else: + self._physical_blocks_per_logical_kv_block = ( + self.block_size // kernel_block_size + ) + self.block_size = kernel_block_size + self.num_blocks *= self._physical_blocks_per_logical_kv_block def _nixl_handshake( self, @@ -906,16 +909,11 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): else: self.block_len_per_layer.append(physical_page_size) - expected_first_dim = ( - self._logical_num_blocks - if self.vllm_config.use_v2_model_runner - else self.num_blocks - ) - if cache.shape[0] != expected_first_dim: + if cache.shape[0] != num_blocks: raise AssertionError( "All kv cache tensors must have the same number of " f"blocks; layer={layer_name}, " - f"expected_num_blocks={expected_first_dim}, " + f"expected_num_blocks={num_blocks}, " f"cache_shape={tuple(cache.shape)}, " f"cache_stride={tuple(cache.stride())}, " f"layer_spec={type(layer_spec).__name__}, " @@ -924,8 +922,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): f"{[backend.get_name() for backend in self.attn_backends]}, " f"kv_cache_layout={self.kv_cache_layout}, " "blocks_first=" - f"{self.transfer_topo.is_kv_layout_blocks_first}, " - f"use_v2_model_runner={self.vllm_config.use_v2_model_runner}" + f"{self.transfer_topo.is_kv_layout_blocks_first}" ) if not self.use_mla: