From e46c1ddaf143a2d98df0de47d2e51bafc8e3ae28 Mon Sep 17 00:00:00 2001 From: Josephasafg Date: Sun, 11 Jan 2026 16:08:02 +0200 Subject: [PATCH 1/2] Changed reorder to prefill and decode logic Signed-off-by: Josephasafg --- vllm/v1/attention/backends/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py index 6b94f786a26b..8b798bf3af69 100644 --- a/vllm/v1/attention/backends/utils.py +++ b/vllm/v1/attention/backends/utils.py @@ -1026,9 +1026,9 @@ def reorder_batch_to_split_decodes_and_prefills( num_scheduled_tokens_np = np.array(num_scheduled_tokens) num_computed_tokens_np = input_batch.num_computed_tokens_cpu[:num_reqs] - is_decode = num_scheduled_tokens_np <= decode_threshold - is_extend = (~is_decode) & (num_computed_tokens_np > 0) - is_prefill = (~is_decode) & (num_computed_tokens_np == 0) + is_prefill = num_computed_tokens_np == 0 + is_decode = (num_scheduled_tokens_np <= decode_threshold) & (~is_prefill) + is_extend = (num_scheduled_tokens_np > decode_threshold) & (~is_prefill) # Desired order: decode → extend → prefill req_regions = np.zeros(is_decode.shape, dtype=np.int32) # 0 = decode by default From 56bcd6d0a582d3934e3ca0c5a6d4dfd477980466 Mon Sep 17 00:00:00 2001 From: Josephasafg Date: Sun, 11 Jan 2026 21:04:28 +0200 Subject: [PATCH 2/2] Added two new test cases to test_batch_reordering Signed-off-by: Josephasafg --- tests/v1/attention/test_batch_reordering.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/v1/attention/test_batch_reordering.py b/tests/v1/attention/test_batch_reordering.py index e37219454222..6265e12f9a7d 100644 --- a/tests/v1/attention/test_batch_reordering.py +++ b/tests/v1/attention/test_batch_reordering.py @@ -98,6 +98,27 @@ class ReorderTestCase: expected_order=[0, 1, 6, 8, 4, 3, 2, 7, 5], expected_modified=True, ), + "new_request_single_token_prefill": ReorderTestCase( + requests=[ + (100, 0), + (1, 0), # New request with only 1 token (STILL prefill) + (50, 100), + (1, 10), + ], + # Only index 3 is a true decode (has num_computed_tokens > 0) + expected_order=[3, 2, 0, 1], + expected_modified=True, + ), + "multiple_new_requests_single_token_prefill": ReorderTestCase( + requests=[ + (1, 0), # New prefill (1 token, no computed) + (1, 0), # New prefill (1 token, no computed) + (1, 50), + (200, 0), + ], + expected_order=[2, 1, 0, 3], + expected_modified=True, + ), }