diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/decode_bench_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/decode_bench_connector.py index 6e9e757ffbd5..9a39ec658fff 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/decode_bench_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/decode_bench_connector.py @@ -40,7 +40,10 @@ KVConnectorBase_V1, KVConnectorRole, ) -from vllm.distributed.kv_transfer.kv_connector.v1.base import KVConnectorMetadata +from vllm.distributed.kv_transfer.kv_connector.v1.base import ( + KVConnectorMetadata, + SupportsHMA, +) from vllm.logger import init_logger from vllm.utils.math_utils import cdiv from vllm.v1.attention.backend import AttentionMetadata @@ -71,7 +74,7 @@ class DecodeBenchConnectorMetadata(KVConnectorMetadata): reqs_to_fill: dict[str, tuple[tuple[list[int], ...], int]] -class DecodeBenchConnector(KVConnectorBase_V1): +class DecodeBenchConnector(KVConnectorBase_V1, SupportsHMA): """ A KV Connector for decode instance performance testing. @@ -164,6 +167,17 @@ def request_finished( self.connector_scheduler.request_finished(request) return False, None + def request_finished_all_groups( + self, + request: "Request", + block_ids: tuple[list[int], ...], + ) -> tuple[bool, dict[str, Any] | None]: + # HMA-enabled path: same cleanup as the single-group variant since + # this connector owns no external state per block. + assert self.connector_scheduler is not None + self.connector_scheduler.request_finished(request) + return False, None + class DecodeBenchConnectorScheduler: """Scheduler-side implementation for DecodeBenchConnector."""