Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions vllm/model_executor/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,19 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
"to ensure that attention page size is >= mamba page size.",
attn_block_size,
)
scheduler_config = vllm_config.scheduler_config
if (
cache_config.mamba_cache_mode == "align"
and cache_config.block_size > scheduler_config.max_num_batched_tokens
):
old_max_tokens = scheduler_config.max_num_batched_tokens
scheduler_config.max_num_batched_tokens = cache_config.block_size
logger.warning(
"Automatically increased max_num_batched_tokens from %d"
" to %d to accommodate Mamba align mode block_size",
old_max_tokens,
cache_config.block_size,
)

# By default, mamba block size will be set to max_model_len.
# When enabling prefix caching and using align mamba cache
Expand Down
Loading