@@ -254,6 +254,32 @@ async def configure_ports_with_etcd(config: Config, etcd_client):
254254 set_side_channel_host_and_port (base_side_channel_port )
255255
256256
257+ def create_kv_events_config (config : Config ) -> Optional [KVEventsConfig ]:
258+ """Create KVEventsConfig for prefix caching if needed."""
259+ # If prefix caching is not enabled, no events config needed
260+ if not config .engine_args .enable_prefix_caching :
261+ return None
262+
263+ # If user provided their own config, use that
264+ if getattr (config .engine_args , "kv_events_config" ):
265+ logger .info ("Using user-provided kv_events_config" )
266+ return None
267+
268+ # Create default events config for prefix caching
269+ logger .info ("Creating Dynamo default kv_events_config for prefix caching" )
270+ if config .kv_port is None :
271+ raise ValueError (
272+ "config.kv_port is not set; call configure_ports_with_etcd(...) before overwrite_args "
273+ "or provide --kv-event-config to supply an explicit endpoint."
274+ )
275+ dp_rank = config .engine_args .data_parallel_rank or 0
276+ return KVEventsConfig (
277+ enable_kv_cache_events = True ,
278+ publisher = "zmq" ,
279+ endpoint = f"tcp://*:{ config .kv_port - dp_rank } " , # vLLM will iterate dp_rank for us, so we need to subtract it out TODO: fix in vLLM
280+ )
281+
282+
257283def create_kv_transfer_config (config : Config ) -> Optional [KVTransferConfig ]:
258284 """Create KVTransferConfig based on user config or connector list.
259285
@@ -313,24 +339,16 @@ def overwrite_args(config):
313339 # a NoneType error when the processor accesses the tokenizer.
314340 "skip_tokenizer_init" : False ,
315341 "disable_log_requests" : True ,
316- # KV routing relies on logging KV metrics
317342 "disable_log_stats" : False ,
318343 }
319344
320- kv_config = create_kv_transfer_config (config )
321- if kv_config :
322- defaults ["kv_transfer_config" ] = kv_config
345+ kv_transfer_config = create_kv_transfer_config (config )
346+ if kv_transfer_config :
347+ defaults ["kv_transfer_config" ] = kv_transfer_config
323348
324- if config .engine_args .enable_prefix_caching :
325- dp_rank = config .engine_args .data_parallel_rank or 0
326- defaults |= {
327- # Always setting up kv events if enable prefix cache.
328- "kv_events_config" : KVEventsConfig (
329- enable_kv_cache_events = True ,
330- publisher = "zmq" ,
331- endpoint = f"tcp://*:{ config .kv_port - dp_rank } " , # vLLM will iterate dp_rank for us, so we need to subtract it out TODO: fix in vLLM
332- )
333- }
349+ kv_events_config = create_kv_events_config (config )
350+ if kv_events_config :
351+ defaults ["kv_events_config" ] = kv_events_config
334352
335353 logger .debug ("Setting Dynamo defaults for vLLM" )
336354 for key , value in defaults .items ():
0 commit comments