Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions components/src/dynamo/vllm/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,24 @@ def create_kv_events_config(config: Config) -> Optional[KVEventsConfig]:
if not config.engine_args.enable_prefix_caching:
return None

# There is a bug with KV events publishing when LORA is enabled.
# This is fixed in https://github.com/vllm-project/vllm/pull/27728 but not released yet.
# remove below check once new vLLM version is released with the fix.
if config.engine_args.enable_lora:
if config.engine_args.kv_events_config is None:
# No explicit kv events config provided by user, we'll disable kv cache because LoRA is enabled and its not supported yet.
return None
else:
# User provided their own kv events config and it'll not work when LoRA is enabled.
message = (
"KV events doesn't work when LoRA is enabled due to upstream vLLM bug. "
"Please see https://github.com/vllm-project/vllm/pull/27728."
"For now, either disable lora or dont use explicit kv envents config."
"Dont set both --kv-events-config and --enable-lora in vllm command line args."
)
logger.error(message)
raise ValueError(message)

# If user provided their own config, use that
if c := getattr(config.engine_args, "kv_events_config"):
logger.info(f"Using user-provided kv_events_config {c}")
Expand Down
3 changes: 3 additions & 0 deletions components/src/dynamo/vllm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,9 @@ def setup_kv_event_publisher(
logger.info("Skipping KV event publisher setup for decode worker")
return None

if config.engine_args.kv_events_config is None:
return None

# Get data_parallel_size to create publishers for all dp_ranks
data_parallel_size = getattr(vllm_config.parallel_config, "data_parallel_size", 1)
kv_publishers = []
Expand Down
Loading