@@ -38,8 +38,11 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
3838 temp_config .kv_transfer_config = KVTransferConfig (** ktc )
3939 self ._connectors .append (
4040 KVConnectorFactory .create_connector_v1 (temp_config , role ))
41+
42+ # A mapping from request id to the connector that is assigned to it.
43+ self ._requests_to_connector : dict [str , KVConnectorBase_V1 ] = {}
4144
42- # We are overriding the base class method here because we need to bind
45+ # We must override the base class method here because we need to bind
4346 # the metadata to each connector in the order of the connectors in the
4447 # MultiKVConnectorMetadata.
4548 def bind_connector_metadata (
@@ -81,14 +84,24 @@ def get_num_new_matched_tokens(
8184 request : "Request" ,
8285 num_computed_tokens : int ,
8386 ) -> int :
84- return max (
85- c .get_num_new_matched_tokens (request , num_computed_tokens )
86- for c in self ._connectors )
87+ for c in self ._connectors :
88+ toks = c .get_num_new_matched_tokens (request , num_computed_tokens )
89+ # The first connector that has new matched tokens will be assigned
90+ # to this request.
91+ if toks > 0 :
92+ self ._requests_to_connector [request .req_id ] = c
93+ return toks
94+ return 0
95+
8796
8897 def update_state_after_alloc (self , request : "Request" ,
8998 num_external_tokens : int ):
90- for c in self ._connectors :
91- c .update_state_after_alloc (request , num_external_tokens )
99+ # If the request is not assigned to any connector, we do nothing.
100+ if request .req_id not in self ._requests_to_connector :
101+ return
102+ # We assume that the request is assigned to only one connector.
103+ c = self ._requests_to_connector [request .req_id ]
104+ c .update_state_after_alloc (request , num_external_tokens )
92105
93106 def build_connector_meta (
94107 self ,
0 commit comments