Skip to content

Commit f9a7025

Browse files
gshtrasadobrzyn
authored andcommitted
[Bugfix] config.head_dim is now explicitly set to None (vllm-project#18432)
Signed-off-by: Gregory Shtrasberg <[email protected]>
1 parent 1dc9b66 commit f9a7025

File tree

8 files changed

+27
-18
lines changed

8 files changed

+27
-18
lines changed

vllm/distributed/kv_transfer/kv_connector/utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,9 @@ def get_model_args(self, model_executable: torch.nn.Module):
4444
head_size = model_config.qk_nope_head_dim + \
4545
model_config.qk_rope_head_dim
4646
else:
47-
head_size = getattr(model_config, "head_dim",
48-
int(hidden_size // num_attention_heads))
47+
head_size = getattr(model_config, "head_dim", None)
48+
if head_size is None:
49+
head_size = int(hidden_size // num_attention_heads)
4950

5051
return num_heads, head_size
5152

vllm/model_executor/models/exaone.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,9 @@ def __init__(
126126
assert tp_size % self.total_num_kv_heads == 0
127127
self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
128128
# MistralConfig has an optional head_dim introduced by Mistral-Nemo
129-
self.head_dim = getattr(config, "head_dim",
130-
self.hidden_size // self.total_num_heads)
129+
self.head_dim = getattr(config, "head_dim", None)
130+
if self.head_dim is None:
131+
self.head_dim = self.hidden_size // self.total_num_heads
131132
self.q_size = self.num_heads * self.head_dim
132133
self.kv_size = self.num_kv_heads * self.head_dim
133134
self.scaling = self.head_dim**-0.5

vllm/model_executor/models/granite.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,9 @@ def __init__(
121121
assert tp_size % self.total_num_kv_heads == 0
122122
self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
123123
# MistralConfig has an optional head_dim introduced by Mistral-Nemo
124-
self.head_dim = getattr(config, "head_dim",
125-
self.hidden_size // self.total_num_heads)
124+
self.head_dim = getattr(config, "head_dim", None)
125+
if self.head_dim is None:
126+
self.head_dim = self.hidden_size // self.total_num_heads
126127
self.q_size = self.num_heads * self.head_dim
127128
self.kv_size = self.num_kv_heads * self.head_dim
128129
self.scaling = config.attention_multiplier

vllm/model_executor/models/minimax_text_01.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -603,8 +603,9 @@ def __init__(
603603

604604
rope_theta = getattr(config, "rope_theta", 10000)
605605

606-
head_dim = getattr(config, "head_dim",
607-
config.hidden_size // config.num_attention_heads)
606+
head_dim = getattr(config, "head_dim", None)
607+
if head_dim is None:
608+
head_dim = config.hidden_size // config.num_attention_heads
608609
if hasattr(config, "max_model_len") and isinstance(
609610
config.max_model_len, int):
610611
max_position_embeddings = min(config.max_position_embeddings,
@@ -860,8 +861,9 @@ def layer_fn(prefix):
860861
cache_shape=self.cache_shape)
861862

862863
rope_theta = getattr(config, "rope_theta", 10000)
863-
head_dim = getattr(config, "head_dim",
864-
config.hidden_size // config.num_attention_heads)
864+
head_dim = getattr(config, "head_dim", None)
865+
if head_dim is None:
866+
head_dim = config.hidden_size // config.num_attention_heads
865867
if hasattr(config, "max_model_len") and isinstance(
866868
config.max_model_len, int):
867869
max_position_embeddings = min(config.max_position_embeddings,

vllm/model_executor/models/mixtral.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,9 @@ def __init__(
137137
assert tp_size % self.total_num_kv_heads == 0
138138
self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
139139
# MixtralConfig has an optional head_dim argument
140-
self.head_dim = getattr(config, "head_dim",
141-
self.hidden_size // self.total_num_heads)
140+
self.head_dim = getattr(config, "head_dim", None)
141+
if self.head_dim is None:
142+
self.head_dim = self.hidden_size // self.total_num_heads
142143
self.q_size = self.num_heads * self.head_dim
143144
self.kv_size = self.num_kv_heads * self.head_dim
144145
self.scaling = self.head_dim**-0.5

vllm/model_executor/models/mixtral_quant.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,9 @@ def __init__(
192192
assert tp_size % self.total_num_kv_heads == 0
193193
self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
194194
# MixtralConfig has an optional head_dim argument
195-
self.head_dim = getattr(config, "head_dim",
196-
self.hidden_size // self.total_num_heads)
195+
self.head_dim = getattr(config, "head_dim", None)
196+
if self.head_dim is None:
197+
self.head_dim = self.hidden_size // self.total_num_heads
197198
self.q_size = self.num_heads * self.head_dim
198199
self.kv_size = self.num_kv_heads * self.head_dim
199200
self.scaling = self.head_dim**-0.5

vllm/model_executor/models/nemotron.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,9 @@ def __init__(
157157
assert tp_size % self.total_num_kv_heads == 0
158158
self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
159159
# MistralConfig has an optional head_dim introduced by Mistral-Nemo
160-
self.head_dim = getattr(config, "head_dim",
161-
self.hidden_size // self.total_num_heads)
160+
self.head_dim = getattr(config, "head_dim", None)
161+
if self.head_dim is None:
162+
self.head_dim = self.hidden_size // self.total_num_heads
162163
self.q_size = self.num_heads * self.head_dim
163164
self.kv_size = self.num_kv_heads * self.head_dim
164165
self.scaling = self.head_dim**-0.5

vllm/model_executor/models/solar.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,9 @@ def __init__(
125125
assert tp_size % self.total_num_kv_heads == 0
126126
self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
127127
# MistralConfig has an optional head_dim introduced by Mistral-Nemo
128-
self.head_dim = getattr(config, "head_dim",
129-
self.hidden_size // self.total_num_heads)
128+
self.head_dim = getattr(config, "head_dim", None)
129+
if self.head_dim is None:
130+
self.head_dim = self.hidden_size // self.total_num_heads
130131
self.q_size = self.num_heads * self.head_dim
131132
self.kv_size = self.num_kv_heads * self.head_dim
132133
self.scaling = self.head_dim**-0.5

0 commit comments

Comments
 (0)