Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix sliding window mgr #3068

Merged
merged 1 commit into from
Jan 23, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 9 additions & 29 deletions lmdeploy/pytorch/paging/block_manager/window_block_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,6 @@ def _div_up(x, n):
return (x + n - 1) // n


def _last_block_size(history_len: int, block_size: int):
"""last block size."""
last = history_len % block_size
last = last if last != 0 else block_size
return last


def _num_blocks_to_drop(seq: SchedulerSequence, window_size: int):
"""num blocks to free."""
if seq.history_len <= window_size:
Expand Down Expand Up @@ -47,30 +40,17 @@ def __init__(self, num_gpu_blocks: int, num_cpu_blocks: int, window_size: int):
f'but get window_size = {window_size}')
self.window_size = window_size

@classmethod
def num_required_blocks(cls, obj: SchedulerSequence, prealloc_size: int = 0):
def num_required_blocks(self, obj: SchedulerSequence, prealloc_size: int = 0):
"""get num required blocks."""

def __num_req_seq(seq: SchedulerSequence):
"""get num required seq blocks."""
block_size = seq.block_size
lb_tokens = cls.last_block_size(seq)
lb_remain_tokens = 0
if len(seq.logical_blocks) > 0:
lb_remain_tokens = block_size - lb_tokens
num_input_tokens = seq.num_token_ids + prealloc_size
num_req_tokens = max(0, num_input_tokens - lb_remain_tokens)
return _div_up(num_req_tokens, block_size)

return __num_req_seq(obj)

@classmethod
def last_block_size(cls, seq: SchedulerSequence) -> int:
"""get last block size."""
num_blocks = len(seq.logical_blocks)
if num_blocks == 0:
return 0
return _last_block_size(seq.history_len, seq.block_size)
# blocks is not enough
if obj.num_history_ids < self.window_size:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are the relation between num_history_ids and num_token_ids

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

history: cache ready
token: cache unready

return super().num_required_blocks(obj, prealloc_size)

# we only keep history less than window_size
num_tokens = self.window_size + obj.num_token_ids + prealloc_size
num_all_blocks = _div_up(num_tokens, obj.block_size)
return max(0, num_all_blocks - len(obj.logical_blocks))

def can_allocate(self, msg: SchedulerSequence, prealloc_size: int = 0):
"""Return if physical block can be allocated for given message."""
Expand Down