diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 2f2acdd37d6e..da2982cb3311 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -1632,7 +1632,11 @@ def add_request(self, request: Request, request_wave: int = 0): if self.has_coordinator and request_wave != self.current_wave: if request_wave > self.current_wave: self.current_wave = request_wave - elif not self.engines_running: + elif ( + not self.engines_running + and self.scheduler.pause_state == PauseState.UNPAUSED + ): + self.engines_running = True # Request received for an already-completed wave, notify # front-end that we need to start the next one. self.output_queue.put_nowait(