2222    gguf .GGMLQuantizationType .Q4_K  : (256 , 2  +  2  +  QK_K  //  2  +  12 ),
2323    gguf .GGMLQuantizationType .Q5_K  : (256 , 2  +  2  +  QK_K  //  2  +  QK_K  //  8  +  12 ),
2424    gguf .GGMLQuantizationType .Q6_K  : (256 , 2  +  QK_K  //  2  +  QK_K  //  4  +  QK_K  //  16 ),
25-     gguf .GGMLQuantizationType .Q8_K  : (256 , 2  +  QK_K  +  QK_K  //  8 ),
25+     gguf .GGMLQuantizationType .Q8_K  : (256 , 4  +  QK_K  +  QK_K  //  8 ),
2626}
2727
2828class  Hyperparameters :
2929    def  __init__ (self ):
3030        self .n_vocab  =  self .n_embd  =  self .n_mult  =  self .n_head  =  self .n_layer  =  self .n_rot  =  self .ftype  =  0 
31+         self .n_ff  =  0 
32+ 
33+     def  set_n_ff (self , model ):
34+         ff_tensor_idx  =  model .tensor_map .get (b'layers.0.feed_forward.w1.weight' )
35+         assert  ff_tensor_idx  is  not None , 'Missing layer 0 FF tensor' 
36+         ff_tensor  =  model .tensors [ff_tensor_idx ]
37+         self .n_ff  =  ff_tensor .dims [1 ]
3138
3239    def  load (self , data , offset ):
3340        (
@@ -42,7 +49,7 @@ def load(self, data, offset):
4249        return  4  *  7 
4350
4451    def  __str__ (self ):
45-         return  f'<Hyperparameters: n_vocab={ self .n_vocab } { self .n_embd } { self .n_mult } { self .n_head } { self .n_layer } { self .n_rot } { self .ftype }  
52+         return  f'<Hyperparameters: n_vocab={ self .n_vocab } { self .n_embd } { self .n_mult } { self .n_head } { self .n_layer } { self .n_rot } n_ff= { self . n_ff } ,  ftype={ self .ftype }  
4653
4754class  Vocab :
4855    def  __init__ (self ):
@@ -122,6 +129,7 @@ def load(self, data, offset):
122129        self .vocab  =  vocab 
123130        self .tensors  =  tensors 
124131        self .tensor_map  =  tensor_map 
132+         hp .set_n_ff (self )
125133        return  offset 
126134
127135class  GGMLToGGUF :
@@ -132,10 +140,6 @@ def __init__(self, ggml_model, data, cfg, params_override = None, vocab_override
132140        self .cfg  =  cfg 
133141        self .params_override  =  params_override 
134142        self .vocab_override  =  vocab_override 
135-         ff_tensor_idx  =  ggml_model .tensor_map .get (b'layers.0.feed_forward.w1.weight' )
136-         assert  ff_tensor_idx  is  not None , 'Missing layer 0 FF tensor' 
137-         ff_tensor  =  ggml_model .tensors [ff_tensor_idx ]
138-         self .ff_length  =  ff_tensor .dims [1 ]
139143        if  params_override  is  not None :
140144            n_kv_head  =  params_override .n_head_kv 
141145        else :
@@ -196,7 +200,7 @@ def add_params(self, gguf_writer):
196200        gguf_writer .add_context_length (cfg .context_length )
197201        gguf_writer .add_embedding_length (hp .n_embd )
198202        gguf_writer .add_block_count (hp .n_layer )
199-         gguf_writer .add_feed_forward_length (self . ff_length )
203+         gguf_writer .add_feed_forward_length (hp . n_ff )
200204        gguf_writer .add_rope_dimension_count (hp .n_embd  //  hp .n_head )
201205        gguf_writer .add_head_count (hp .n_head )
202206        gguf_writer .add_head_count_kv (self .n_kv_head )
@@ -267,18 +271,24 @@ def add_tensors(self, gguf_writer):
267271            # print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}') 
268272            gguf_writer .add_tensor (mapped_name , data [tensor .start_offset :tensor .start_offset  +  tensor .len_bytes ], raw_shape  =  tempdims , raw_dtype  =  tensor .dtype )
269273
270- def  handle_metadata (cfg ):
274+ def  handle_metadata (cfg ,  hp ):
271275    import  convert 
272276    assert  cfg .model_metadata_dir .is_dir (), 'Metadata dir is not a directory' 
273277    hf_config_path    =  cfg .model_metadata_dir  /  "config.json" 
274278    orig_config_path  =  cfg .model_metadata_dir  /  "params.json" 
275-     # Passing None to these load functions is not kosher but it should 
276-     # currently work for HF and only fail for original mode if 
277-     # n_vocab or n_ff is missing in params.json 
279+     # We pass a fake model here. "original" mode will check the shapes of some 
280+     # tensors if information is missing in the .json file: other than that, the 
281+     # model data isn't used so this should be safe (at least for now). 
282+     fakemodel  =  {
283+         'tok_embeddings.weight' : convert .LazyTensor .__new__ (convert .LazyTensor ),
284+         'layers.0.feed_forward.w1.weight' : convert .LazyTensor .__new__ (convert .LazyTensor ),
285+     }
286+     fakemodel ['tok_embeddings.weight' ].shape  =  [hp .n_vocab ]
287+     fakemodel ['layers.0.feed_forward.w1.weight' ].shape  =  [hp .n_ff ]
278288    if  hf_config_path .exists ():
279-         params  =  convert .Params .loadHFTransformerJson (None , hf_config_path )
289+         params  =  convert .Params .loadHFTransformerJson (fakemodel , hf_config_path )
280290    elif  orig_config_path .exists ():
281-         params  =  convert .Params .loadOriginalParamsJson (None , orig_config_path )
291+         params  =  convert .Params .loadOriginalParamsJson (fakemodel , orig_config_path )
282292    else :
283293        raise  ValueError ('Unable to load metadata' )
284294    vocab  =  convert .load_vocab (cfg .vocab_dir  if  cfg .vocab_dir  is  not None  else  cfg .model_metadata_dir , cfg .vocabtype )
@@ -303,20 +313,20 @@ def main():
303313    cfg  =  handle_args ()
304314    print (f'* Using config: { cfg }  )
305315    print ('\n === WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n ' )
316+     data  =  np .memmap (cfg .input , mode  =  'r' )
317+     model  =  GGMLV3Model ()
318+     print ('* Scanning GGML input file' )
319+     offset  =  model .load (data , 0 )
320+     print (f'* GGML model hyperparameters: { model .hyperparameters }  )
306321    vocab_override  =  None 
307322    params_override  =  None 
308323    if  cfg .model_metadata_dir  is  not None :
309-         (params_override , vocab_override ) =  handle_metadata (cfg )
324+         (params_override , vocab_override ) =  handle_metadata (cfg ,  model . hyperparameters )
310325        print ('!! Note: When overriding params the --gqa, --eps and --context-length options are ignored.' )
311326        print (f'* Overriding params: { params_override }  )
312327        print (f'* Overriding vocab: { vocab_override }  )
313328    else :
314329        print ('\n === WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n ' )
315-     data  =  np .memmap (cfg .input , mode  =  'r' )
316-     model  =  GGMLV3Model ()
317-     print ('* Scanning GGML input file' )
318-     offset  =  model .load (data , 0 )
319-     print (f'* GGML model hyperparameters: { model .hyperparameters }  )
320330    converter  =  GGMLToGGUF (model , data , cfg , params_override  =  params_override , vocab_override  =  vocab_override )
321331    converter .save ()
322332    print (f'* Successful completion. Output saved to: { cfg .output }  )
0 commit comments