From 67e1c052d44b88b2ed90343f527d094ab0b463ac Mon Sep 17 00:00:00 2001 From: alec-flowers Date: Thu, 21 Aug 2025 15:16:32 -0700 Subject: [PATCH 1/2] respect command line --- .../backends/vllm/src/dynamo/vllm/args.py | 41 ++++++++++++------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/components/backends/vllm/src/dynamo/vllm/args.py b/components/backends/vllm/src/dynamo/vllm/args.py index 293275f046..9df08928da 100644 --- a/components/backends/vllm/src/dynamo/vllm/args.py +++ b/components/backends/vllm/src/dynamo/vllm/args.py @@ -254,6 +254,27 @@ async def configure_ports_with_etcd(config: Config, etcd_client): set_side_channel_host_and_port(base_side_channel_port) +def create_kv_events_config(config: Config) -> Optional[KVEventsConfig]: + """Create KVEventsConfig for prefix caching if needed.""" + # If prefix caching is not enabled, no events config needed + if not config.engine_args.enable_prefix_caching: + return None + + # If user provided their own config, use that + if config.engine_args.kv_events_config: + logger.info("Using user-provided kv_events_config") + return None + + # Create default events config for prefix caching + logger.info("Creating Dynamo default kv_events_config for prefix caching") + dp_rank = config.engine_args.data_parallel_rank or 0 + return KVEventsConfig( + enable_kv_cache_events=True, + publisher="zmq", + endpoint=f"tcp://*:{config.kv_port - dp_rank}", # vLLM will iterate dp_rank for us, so we need to subtract it out TODO: fix in vLLM + ) + + def create_kv_transfer_config(config: Config) -> Optional[KVTransferConfig]: """Create KVTransferConfig based on user config or connector list. @@ -313,24 +334,16 @@ def overwrite_args(config): # a NoneType error when the processor accesses the tokenizer. "skip_tokenizer_init": False, "disable_log_requests": True, - # KV routing relies on logging KV metrics "disable_log_stats": False, } - kv_config = create_kv_transfer_config(config) - if kv_config: - defaults["kv_transfer_config"] = kv_config + kv_transfer_config = create_kv_transfer_config(config) + if kv_transfer_config: + defaults["kv_transfer_config"] = kv_transfer_config - if config.engine_args.enable_prefix_caching: - dp_rank = config.engine_args.data_parallel_rank or 0 - defaults |= { - # Always setting up kv events if enable prefix cache. - "kv_events_config": KVEventsConfig( - enable_kv_cache_events=True, - publisher="zmq", - endpoint=f"tcp://*:{config.kv_port - dp_rank}", # vLLM will iterate dp_rank for us, so we need to subtract it out TODO: fix in vLLM - ) - } + kv_events_config = create_kv_events_config(config) + if kv_events_config: + defaults["kv_events_config"] = kv_events_config logger.debug("Setting Dynamo defaults for vLLM") for key, value in defaults.items(): From 1949a4fe9a5303220ee0b97dd34771659f0ce34b Mon Sep 17 00:00:00 2001 From: Alec <35311602+alec-flowers@users.noreply.github.com> Date: Thu, 21 Aug 2025 16:25:27 -0700 Subject: [PATCH 2/2] Update components/backends/vllm/src/dynamo/vllm/args.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: Alec <35311602+alec-flowers@users.noreply.github.com> --- components/backends/vllm/src/dynamo/vllm/args.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/components/backends/vllm/src/dynamo/vllm/args.py b/components/backends/vllm/src/dynamo/vllm/args.py index 9df08928da..d0f0c9b341 100644 --- a/components/backends/vllm/src/dynamo/vllm/args.py +++ b/components/backends/vllm/src/dynamo/vllm/args.py @@ -261,12 +261,17 @@ def create_kv_events_config(config: Config) -> Optional[KVEventsConfig]: return None # If user provided their own config, use that - if config.engine_args.kv_events_config: + if getattr(config.engine_args, "kv_events_config"): logger.info("Using user-provided kv_events_config") return None # Create default events config for prefix caching logger.info("Creating Dynamo default kv_events_config for prefix caching") + if config.kv_port is None: + raise ValueError( + "config.kv_port is not set; call configure_ports_with_etcd(...) before overwrite_args " + "or provide --kv-event-config to supply an explicit endpoint." + ) dp_rank = config.engine_args.data_parallel_rank or 0 return KVEventsConfig( enable_kv_cache_events=True,