diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 7fbce2da8587..04af8248eff9 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -461,6 +461,11 @@ def step_with_batch_queue( with self.log_error_detail(scheduler_output): model_output = future.result() + # Handle None return from execute_model (async scheduling) + if model_output is None: + grammar_output = self.scheduler.get_grammar_bitmask(scheduler_output) + model_output = self.model_executor.sample_tokens(grammar_output) + # Before processing the model output, process any aborts that happened # during the model execution. self._process_aborts_queue()