ai-dynamo · biswapanda · Nov 10, 2025 · Nov 5, 2025 · Nov 5, 2025 · Nov 10, 2025
@@ -326,6 +326,24 @@ def create_kv_events_config(config: Config) -> Optional[KVEventsConfig]:
     if not config.engine_args.enable_prefix_caching:
         return None
 
+    # There is a bug with KV events publishing when LORA is enabled.
+    # This is fixed in https://github.com/vllm-project/vllm/pull/27728 but not released yet.
+    # remove below check once new vLLM version is released with the fix.
+    if config.engine_args.enable_lora:
+        if config.engine_args.kv_events_config is None:
+            # No explicit kv events config provided by user, we'll disable kv cache because LoRA is enabled and its not supported yet.
+            return None
+        else:
+            # User provided their own kv events config and it'll not work when LoRA is enabled.
+            message = (
+                "KV events doesn't work when LoRA is enabled due to upstream vLLM bug. "
+                "Please see https://github.com/vllm-project/vllm/pull/27728."
+                "For now, either disable lora or dont use explicit kv envents config."
+                "Dont set both --kv-events-config and --enable-lora in vllm command line args."
+            )
+            logger.error(message)
+            raise ValueError(message)
+
     # If user provided their own config, use that
     if c := getattr(config.engine_args, "kv_events_config"):
         logger.info(f"Using user-provided kv_events_config {c}")

@@ -153,6 +153,9 @@ def setup_kv_event_publisher(
         logger.info("Skipping KV event publisher setup for decode worker")
         return None
 
+    if config.engine_args.kv_events_config is None:
+        return None
+
     # Get data_parallel_size to create publishers for all dp_ranks
     data_parallel_size = getattr(vllm_config.parallel_config, "data_parallel_size", 1)
     kv_publishers = []