Skip to content

Commit a98cd6e

Browse files
alec-flowersnnshah1
authored andcommitted
fix: --kv-event-config now respects command line (#2627)
Signed-off-by: nnshah1 <[email protected]>
1 parent 6ed6a3b commit a98cd6e

File tree

1 file changed

+32
-14
lines changed
  • components/backends/vllm/src/dynamo/vllm

1 file changed

+32
-14
lines changed

components/backends/vllm/src/dynamo/vllm/args.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,32 @@ async def configure_ports_with_etcd(config: Config, etcd_client):
254254
set_side_channel_host_and_port(base_side_channel_port)
255255

256256

257+
def create_kv_events_config(config: Config) -> Optional[KVEventsConfig]:
258+
"""Create KVEventsConfig for prefix caching if needed."""
259+
# If prefix caching is not enabled, no events config needed
260+
if not config.engine_args.enable_prefix_caching:
261+
return None
262+
263+
# If user provided their own config, use that
264+
if getattr(config.engine_args, "kv_events_config"):
265+
logger.info("Using user-provided kv_events_config")
266+
return None
267+
268+
# Create default events config for prefix caching
269+
logger.info("Creating Dynamo default kv_events_config for prefix caching")
270+
if config.kv_port is None:
271+
raise ValueError(
272+
"config.kv_port is not set; call configure_ports_with_etcd(...) before overwrite_args "
273+
"or provide --kv-event-config to supply an explicit endpoint."
274+
)
275+
dp_rank = config.engine_args.data_parallel_rank or 0
276+
return KVEventsConfig(
277+
enable_kv_cache_events=True,
278+
publisher="zmq",
279+
endpoint=f"tcp://*:{config.kv_port - dp_rank}", # vLLM will iterate dp_rank for us, so we need to subtract it out TODO: fix in vLLM
280+
)
281+
282+
257283
def create_kv_transfer_config(config: Config) -> Optional[KVTransferConfig]:
258284
"""Create KVTransferConfig based on user config or connector list.
259285
@@ -313,24 +339,16 @@ def overwrite_args(config):
313339
# a NoneType error when the processor accesses the tokenizer.
314340
"skip_tokenizer_init": False,
315341
"disable_log_requests": True,
316-
# KV routing relies on logging KV metrics
317342
"disable_log_stats": False,
318343
}
319344

320-
kv_config = create_kv_transfer_config(config)
321-
if kv_config:
322-
defaults["kv_transfer_config"] = kv_config
345+
kv_transfer_config = create_kv_transfer_config(config)
346+
if kv_transfer_config:
347+
defaults["kv_transfer_config"] = kv_transfer_config
323348

324-
if config.engine_args.enable_prefix_caching:
325-
dp_rank = config.engine_args.data_parallel_rank or 0
326-
defaults |= {
327-
# Always setting up kv events if enable prefix cache.
328-
"kv_events_config": KVEventsConfig(
329-
enable_kv_cache_events=True,
330-
publisher="zmq",
331-
endpoint=f"tcp://*:{config.kv_port - dp_rank}", # vLLM will iterate dp_rank for us, so we need to subtract it out TODO: fix in vLLM
332-
)
333-
}
349+
kv_events_config = create_kv_events_config(config)
350+
if kv_events_config:
351+
defaults["kv_events_config"] = kv_events_config
334352

335353
logger.debug("Setting Dynamo defaults for vLLM")
336354
for key, value in defaults.items():

0 commit comments

Comments
 (0)