Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions vllm/v1/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2532,6 +2532,18 @@ def execute_model(
return make_empty_encoder_model_runner_output(scheduler_output)

if not num_scheduled_tokens:
if (
self.parallel_config.distributed_executor_backend
== "external_launcher"
and self.parallel_config.data_parallel_size > 1
):
# this is a corner case when both external launcher
# and DP are enabled, num_scheduled_tokens could be
# 0, and has_unfinished_requests in the outer loop
# returns True. before returning early here we call
# dummy run to ensure coordinate_batch_across_dp
# is called into to avoid out of sync issues.
self._dummy_run(1)
if not has_kv_transfer_group():
# Return empty ModelRunnerOutput if no work to do.
return EMPTY_MODEL_RUNNER_OUTPUT
Expand Down