From f9b4a2d41587da0692d32797221df55a02d890a6 Mon Sep 17 00:00:00 2001 From: Vladislav Kruglikov Date: Mon, 9 Sep 2024 21:20:46 +0300 Subject: [PATCH] [Bugfix] Correct adapter usage for cohere and jamba (#8292) --- vllm/model_executor/models/commandr.py | 5 +++-- vllm/model_executor/models/jamba.py | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/models/commandr.py b/vllm/model_executor/models/commandr.py index be7f19d15b623..649dc798d22dc 100644 --- a/vllm/model_executor/models/commandr.py +++ b/vllm/model_executor/models/commandr.py @@ -47,6 +47,8 @@ from vllm.model_executor.utils import set_weight_attrs from vllm.sequence import IntermediateTensors +from .interfaces import SupportsLoRA + @torch.compile def layer_norm_func(hidden_states, weight, variance_epsilon): @@ -292,8 +294,7 @@ def forward( return hidden_states -class CohereForCausalLM(nn.Module): - +class CohereForCausalLM(nn.Module, SupportsLoRA): packed_modules_mapping = { "qkv_proj": [ "q_proj", diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py index 73be7ffed0f89..29dd09afac5ad 100644 --- a/vllm/model_executor/models/jamba.py +++ b/vllm/model_executor/models/jamba.py @@ -38,6 +38,8 @@ from vllm.worker.model_runner import (_BATCH_SIZES_TO_CAPTURE, _get_graph_batch_size) +from .interfaces import SupportsLoRA + KVCache = Tuple[torch.Tensor, torch.Tensor] @@ -539,7 +541,7 @@ def forward( return hidden_states -class JambaForCausalLM(nn.Module, HasInnerState): +class JambaForCausalLM(nn.Module, HasInnerState, SupportsLoRA): packed_modules_mapping = { "qkv_proj": [ "q_proj",