diff --git a/vllm/lora/layers/fused_moe.py b/vllm/lora/layers/fused_moe.py index 4d4e053cffd2..e3d9894de81e 100644 --- a/vllm/lora/layers/fused_moe.py +++ b/vllm/lora/layers/fused_moe.py @@ -219,7 +219,7 @@ def wrapper(*args, **kwargs): self.max_loras, self.adapter_enabled, expert_map, - naive_block_assignment, + naive_block_assignment=naive_block_assignment, ) moe_state_dict["sorted_token_ids_lora"] = sorted_token_ids_lora diff --git a/vllm/lora/punica_wrapper/punica_base.py b/vllm/lora/punica_wrapper/punica_base.py index fdcf6c0cb124..facbd681a09a 100644 --- a/vllm/lora/punica_wrapper/punica_base.py +++ b/vllm/lora/punica_wrapper/punica_base.py @@ -458,6 +458,7 @@ def moe_lora_align_block_size( adapter_enabled: torch.Tensor, expert_map: torch.Tensor | None = None, pad_sorted_ids: bool = False, + naive_block_assignment: bool = False, ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """ Aligns tokens and experts into block-sized chunks for LoRA-based