FIX vllm-project#7592 keeping chunked prefill performance the untouched

noooop · Aug 27, 2024 · 408b727 · 408b727
1 parent 98312de
commit 408b727
Showing 1 changed file with 3 additions and 1 deletion.
diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py
@@ -511,7 +511,9 @@ def _schedule_running(
         # to keep all the sequence groups in the RUNNING state.
 
         if enable_chunking:
-            # Once chunked prefill is enabled, the policy is changed to prioritize decode requests.
+            # By default, vLLM scheduler prioritizes prefills.
+            # Once chunked prefill is enabled,
+            # the policy is changed to prioritize decode requests.
             self.running = deque(
                 sorted(
                     self.running,