@@ -2271,13 +2271,6 @@ def set_vocab(self):
22712271
22722272 special_vocab .add_to_gguf (self .gguf_writer )
22732273
2274- def _hf_permute_qk (self , weights , n_head : int , n_head_kv : int ):
2275- if n_head_kv is not None and n_head != n_head_kv :
2276- n_head = n_head_kv
2277- return (weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
2278- .swapaxes (1 , 2 )
2279- .reshape (weights .shape ))
2280-
22812274 def set_gguf_parameters (self ):
22822275 self .gguf_writer .add_name ("InternLM2" )
22832276 self .gguf_writer .add_context_length (self .hparams ["max_position_embeddings" ])
@@ -2297,26 +2290,22 @@ def set_gguf_parameters(self):
22972290 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
22982291 num_heads = self .hparams ["num_attention_heads" ]
22992292 num_kv_heads = self .hparams ["num_key_value_heads" ]
2300- hidden_size = self .hparams ["hidden_size" ]
2293+ n_embd = self .hparams ["hidden_size" ]
23012294 q_per_kv = num_heads // num_kv_heads
2302- head_dim = hidden_size // num_heads
2295+ head_dim = n_embd // num_heads
23032296 num_groups = num_heads // q_per_kv
23042297
2305- qkv_pattern = r"model\.layers\.(\d+)\.attention\.wqkv"
2306-
2307- if re .match (qkv_pattern , name ):
2308- bid = re .findall (qkv_pattern , name )[0 ]
2298+ if bid is not None and f"model.layers.{ bid } .attention.wqkv" in name :
23092299 qkv = data_torch
2310- # qkv = rearrange(qkv.T, " o (g n i) ->o g n i", g=num_groups, n=q_per_kv + 2, i=head_dim)
2311- qkv = qkv .T .reshape ((- 1 , num_groups , q_per_kv + 2 , head_dim ))
2312- q , k , v = qkv [..., : q_per_kv , :], qkv [..., q_per_kv : q_per_kv + 1 , :], qkv [..., q_per_kv + 1 : q_per_kv + 2 , :]
2300+
2301+ qkv = qkv .reshape ((num_groups , q_per_kv + 2 , head_dim , n_embd ))
2302+ q , k , v = qkv [:, : q_per_kv ], qkv [:, - 2 ], qkv [:, - 1 ]
2303+
23132304 # The model weights of q and k equire additional reshape.
2314- # q = self._hf_permute_qk(rearrange(q, " o g n i -> o (g n i)").T, num_heads, num_heads)
2315- q = self ._hf_permute_qk (q .reshape ((q .shape [0 ], - 1 )).T , num_heads , num_heads )
2316- # k = self._hf_permute_qk(rearrange(k, " o g n i -> o (g n i)").T, num_heads, num_kv_heads)
2317- k = self ._hf_permute_qk (k .reshape ((k .shape [0 ], - 1 )).T , num_heads , num_kv_heads )
2318- # v = rearrange(v, " o g n i -> o (g n i)").T
2319- v = v .reshape ((v .shape [0 ], - 1 )).T
2305+ q = LlamaModel .permute (q .reshape ((- 1 , q .shape [- 1 ])), num_heads , num_heads )
2306+ k = LlamaModel .permute (k .reshape ((- 1 , k .shape [- 1 ])), num_heads , num_kv_heads )
2307+ v = v .reshape ((- 1 , v .shape [- 1 ]))
2308+
23202309 return [
23212310 (self .format_tensor_name (gguf .MODEL_TENSOR .ATTN_Q , bid ), q ),
23222311 (self .format_tensor_name (gguf .MODEL_TENSOR .ATTN_K , bid ), k ),
@@ -3620,6 +3609,7 @@ def main() -> None:
36203609 small_first_shard = args .no_tensor_first_split )
36213610
36223611 logger .info ("Set model parameters" )
3612+ model_instance .gguf_writer .add_type (gguf .GGUFType .MODEL )
36233613 model_instance .set_gguf_parameters ()
36243614
36253615 logger .info ("Set model tokenizer" )
0 commit comments