Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions python/sglang/srt/managers/schedule_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,14 +657,16 @@ def preempt_to_schedule(self, req: Req, server_args: ServerArgs) -> bool:
Returns True if preemption was committed, and the new request can be scheduled.
"""
# Iterate running requests to find preemptible requests
valid_running_reqs = (r for r in self.running_batch.reqs
if r not in self.preempt_list)
if server_args.schedule_low_priority_values_first:
sorted_running_reqs = sorted(
self.running_batch.reqs,
sorted_valid_running_reqs = sorted(
valid_running_reqs,
key=lambda x: (-x.priority, -x.time_stats.wait_queue_entry_time),
)
else:
sorted_running_reqs = sorted(
self.running_batch.reqs,
sorted_valid_running_reqs = sorted(
valid_running_reqs,
key=lambda x: (x.priority, -x.time_stats.wait_queue_entry_time),
)
preemptible_reqs = []
Expand All @@ -673,9 +675,7 @@ def preempt_to_schedule(self, req: Req, server_args: ServerArgs) -> bool:
+ min(req.sampling_params.max_new_tokens, CLIP_MAX_NEW_TOKENS)
- self.rem_total_tokens
)
for running_req in sorted_running_reqs:
if running_req in self.preempt_list:
continue
for running_req in sorted_valid_running_reqs:
# Priority difference needs to meet the threshold to be preemptible.
priority_diff = req.priority - running_req.priority
if server_args.schedule_low_priority_values_first:
Expand All @@ -685,6 +685,10 @@ def preempt_to_schedule(self, req: Req, server_args: ServerArgs) -> bool:
min_tokens_to_remove -= self._get_running_request_total_token_offset(
running_req
)
if min_tokens_to_remove <= 0:
break
else:
break

# Check max token count limit can be met
if len(preemptible_reqs) == 0 or min_tokens_to_remove > 0:
Expand Down
Loading