3333KEY_GENERAL_SOURCE_HF_REPO = "general.source.hugginface.repository"
3434
3535# LLM
36- KEY_LLM_CONTEXT_LENGTH = "{llm }.context_length"
37- KEY_LLM_EMBEDDING_LENGTH = "{llm }.embedding_length"
38- KEY_LLM_BLOCK_COUNT = "{llm }.block_count"
39- KEY_LLM_FEED_FORWARD_LENGTH = "{llm }.feed_forward_length"
40- KEY_LLM_USE_PARALLEL_RESIDUAL = "{llm }.use_parallel_residual"
41- KEY_LLM_TENSOR_DATA_LAYOUT = "{llm }.tensor_data_layout"
36+ KEY_LLM_CONTEXT_LENGTH = "{arch }.context_length"
37+ KEY_LLM_EMBEDDING_LENGTH = "{arch }.embedding_length"
38+ KEY_LLM_BLOCK_COUNT = "{arch }.block_count"
39+ KEY_LLM_FEED_FORWARD_LENGTH = "{arch }.feed_forward_length"
40+ KEY_LLM_USE_PARALLEL_RESIDUAL = "{arch }.use_parallel_residual"
41+ KEY_LLM_TENSOR_DATA_LAYOUT = "{arch }.tensor_data_layout"
4242
4343# attention
44- KEY_ATTENTION_HEAD_COUNT = "{llm }.attention.head_count"
45- KEY_ATTENTION_HEAD_COUNT_KV = "{llm }.attention.head_count_kv"
46- KEY_ATTENTION_MAX_ALIBI_BIAS = "{llm }.attention.max_alibi_bias"
47- KEY_ATTENTION_CLAMP_KQV = "{llm }.attention.clamp_kqv"
48- KEY_ATTENTION_LAYERNORM_EPS = "{llm }.attention.layer_norm_epsilon"
49- KEY_ATTENTION_LAYERNORM_RMS_EPS = "{llm }.attention.layer_norm_rms_epsilon"
44+ KEY_ATTENTION_HEAD_COUNT = "{arch }.attention.head_count"
45+ KEY_ATTENTION_HEAD_COUNT_KV = "{arch }.attention.head_count_kv"
46+ KEY_ATTENTION_MAX_ALIBI_BIAS = "{arch }.attention.max_alibi_bias"
47+ KEY_ATTENTION_CLAMP_KQV = "{arch }.attention.clamp_kqv"
48+ KEY_ATTENTION_LAYERNORM_EPS = "{arch }.attention.layer_norm_epsilon"
49+ KEY_ATTENTION_LAYERNORM_RMS_EPS = "{arch }.attention.layer_norm_rms_epsilon"
5050
5151# RoPE
52- KEY_ROPE_DIMENSION_COUNT = "{llm }.rope.dimension_count"
53- KEY_ROPE_SCALE = "{llm }.rope.scale"
52+ KEY_ROPE_DIMENSION_COUNT = "{arch }.rope.dimension_count"
53+ KEY_ROPE_SCALE = "{arch }.rope.scale"
5454
5555# tokenization
5656KEY_TOKENIZER_MODEL = "tokenizer.ggml.model"
@@ -343,14 +343,16 @@ def get_type(val):
343343
344344
345345class GGUFWriter :
346- def __init__ (self , fout : IO ):
347- self .fout = fout
346+ def __init__ (self , path : str , arch : str ):
347+ self .fout = open (path , "wb" )
348+ self .arch = arch
348349 self .offset_tensor = 0
349350 self .data_alignment = GGUF_DEFAULT_ALIGNMENT
350351 self .kv_data = b""
351352 self .kv_data_count = 0
352353 self .ti_data = b""
353354 self .ti_data_count = 0
355+ self .add_architecture ()
354356
355357 def write_header_to_file (self ):
356358 self .fout .write (struct .pack ("<I" , GGUF_MAGIC ))
@@ -368,11 +370,6 @@ def write_ti_data_to_file(self):
368370 self .fout .write (self .ti_data )
369371 self .flush ()
370372
371- @classmethod
372- def open (cls , path : str ) -> "GGUFWriter" :
373- f = open (path , "wb" )
374- return cls (f )
375-
376373 def add_key (self , key : str ):
377374 self .add_val (key , GGUFValueType .STRING , add_vtype = False )
378375
@@ -409,7 +406,8 @@ def add_bool(self, key: str, val: bool):
409406 self .add_val (val , GGUFValueType .BOOL )
410407
411408 def add_string (self , key : str , val : str ):
412- if len (val ) == 0 : return
409+ if len (val ) == 0 :
410+ return
413411 self .add_key (key )
414412 self .add_val (val , GGUFValueType .STRING )
415413
@@ -463,6 +461,8 @@ def ggml_pad(x: int, n: int) -> int:
463461 return ((x + n - 1 ) // n ) * n
464462
465463 def add_tensor_info (self , name : str , tensor_shape : np .ndarray , tensor_dtype : np .dtype , tensor_nbytes : int ):
464+ assert tensor_dtype in (np .float32 , np .float16 ), "Only F32 and F16 tensors are supported for now"
465+
466466 encoded_name = name .encode ("utf8" )
467467 self .ti_data += struct .pack ("<I" , len (encoded_name ))
468468 self .ti_data += encoded_name
@@ -471,7 +471,6 @@ def add_tensor_info(self, name: str, tensor_shape: np.ndarray, tensor_dtype: np.
471471 for i in range (n_dims ):
472472 self .ti_data += struct .pack ("<I" , tensor_shape [n_dims - 1 - i ])
473473
474- assert tensor_dtype in (np .float32 , np .float16 ), "Only F32 and F16 tensors are supported for now"
475474 dtype = GGMLQuantizationType .F32 if tensor_dtype == np .float32 else GGMLQuantizationType .F16
476475 self .ti_data += struct .pack ("<I" , dtype )
477476 self .ti_data += struct .pack ("<Q" , self .offset_tensor )
@@ -495,15 +494,14 @@ def flush(self):
495494 def close (self ):
496495 self .fout .close ()
497496
498- def add_architecture (self , architecture : str ):
499- self .add_string (KEY_GENERAL_ARCHITECTURE ,
500- architecture )
497+ def add_architecture (self ):
498+ self .add_string (KEY_GENERAL_ARCHITECTURE , self .arch )
501499
502500 def add_author (self , author : str ):
503501 self .add_string (KEY_GENERAL_AUTHOR , author )
504502
505503 def add_tensor_data_layout (self , layout : str ):
506- self .add_string (KEY_LLM_TENSOR_DATA_LAYOUT , layout )
504+ self .add_string (KEY_LLM_TENSOR_DATA_LAYOUT . format ( arch = self . arch ) , layout )
507505
508506 def add_url (self , url : str ):
509507 self .add_string (KEY_GENERAL_URL , url )
@@ -531,60 +529,60 @@ def add_custom_alignment(self, alignment: int):
531529 self .data_alignment = alignment
532530 self .add_uint32 (KEY_GENERAL_ALIGNMENT , alignment )
533531
534- def add_context_length (self , llm : str , length : int ):
532+ def add_context_length (self , length : int ):
535533 self .add_uint32 (
536- KEY_LLM_CONTEXT_LENGTH .format (llm = llm ), length )
534+ KEY_LLM_CONTEXT_LENGTH .format (arch = self . arch ), length )
537535
538- def add_embedding_length (self , llm : str , length : int ):
536+ def add_embedding_length (self , length : int ):
539537 self .add_uint32 (
540- KEY_LLM_EMBEDDING_LENGTH .format (llm = llm ), length )
538+ KEY_LLM_EMBEDDING_LENGTH .format (arch = self . arch ), length )
541539
542- def add_block_count (self , llm : str , length : int ):
540+ def add_block_count (self , length : int ):
543541 self .add_uint32 (
544- KEY_LLM_BLOCK_COUNT .format (llm = llm ), length )
542+ KEY_LLM_BLOCK_COUNT .format (arch = self . arch ), length )
545543
546- def add_feed_forward_length (self , llm : str , length : int ):
544+ def add_feed_forward_length (self , length : int ):
547545 self .add_uint32 (
548- KEY_LLM_FEED_FORWARD_LENGTH .format (llm = llm ), length )
546+ KEY_LLM_FEED_FORWARD_LENGTH .format (arch = self . arch ), length )
549547
550- def add_parallel_residual (self , llm : str , use : bool ):
548+ def add_parallel_residual (self , use : bool ):
551549 self .add_bool (
552- KEY_LLM_USE_PARALLEL_RESIDUAL .format (llm = llm ), use )
550+ KEY_LLM_USE_PARALLEL_RESIDUAL .format (arch = self . arch ), use )
553551
554- def add_tensor_data_layout (self , llm : str , layout : str ):
552+ def add_tensor_data_layout (self , layout : str ):
555553 self .add_string (
556- KEY_LLM_TENSOR_DATA_LAYOUT .format (llm = llm ), layout )
554+ KEY_LLM_TENSOR_DATA_LAYOUT .format (arch = self . arch ), layout )
557555
558- def add_head_count (self , llm : str , count : int ):
556+ def add_head_count (self , count : int ):
559557 self .add_uint32 (
560- KEY_ATTENTION_HEAD_COUNT .format (llm = llm ), count )
558+ KEY_ATTENTION_HEAD_COUNT .format (arch = self . arch ), count )
561559
562- def add_head_count_kv (self , llm : str , count : int ):
560+ def add_head_count_kv (self , count : int ):
563561 self .add_uint32 (
564- KEY_ATTENTION_HEAD_COUNT_KV .format (llm = llm ), count )
562+ KEY_ATTENTION_HEAD_COUNT_KV .format (arch = self . arch ), count )
565563
566- def add_max_alibi_bias (self , llm : str , bias : float ):
564+ def add_max_alibi_bias (self , bias : float ):
567565 self .add_float32 (
568- KEY_ATTENTION_MAX_ALIBI_BIAS .format (llm = llm ), bias )
566+ KEY_ATTENTION_MAX_ALIBI_BIAS .format (arch = self . arch ), bias )
569567
570- def add_clamp_kqv (self , llm : str , value : float ):
568+ def add_clamp_kqv (self , value : float ):
571569 self .add_float32 (
572- KEY_ATTENTION_CLAMP_KQV .format (llm = llm ), value )
570+ KEY_ATTENTION_CLAMP_KQV .format (arch = self . arch ), value )
573571
574- def add_layer_norm_eps (self , llm : str , value : float ):
572+ def add_layer_norm_eps (self , value : float ):
575573 self .add_float32 (
576- KEY_ATTENTION_LAYERNORM_EPS .format (llm = llm ), value )
574+ KEY_ATTENTION_LAYERNORM_EPS .format (arch = self . arch ), value )
577575
578- def add_layer_norm_rms_eps (self , llm : str , value : float ):
576+ def add_layer_norm_rms_eps (self , value : float ):
579577 self .add_float32 (
580- KEY_ATTENTION_LAYERNORM_RMS_EPS .format (llm = llm ), value )
578+ KEY_ATTENTION_LAYERNORM_RMS_EPS .format (arch = self . arch ), value )
581579
582- def add_rope_dimension_count (self , llm : str , count : int ):
580+ def add_rope_dimension_count (self , count : int ):
583581 self .add_uint32 (
584- KEY_ROPE_DIMENSION_COUNT .format (llm = llm ), count )
582+ KEY_ROPE_DIMENSION_COUNT .format (arch = self . arch ), count )
585583
586- def add_rope_scale (self , llm : str , value : float ):
587- self .add_float32 (KEY_ROPE_SCALE .format (llm = llm ), value )
584+ def add_rope_scale (self , value : float ):
585+ self .add_float32 (KEY_ROPE_SCALE .format (arch = self . arch ), value )
588586
589587 def add_tokenizer_model (self , model : str ):
590588 self .add_string (KEY_TOKENIZER_MODEL , model )
@@ -619,9 +617,8 @@ def add_pad_token_id(self, id: int):
619617# Example usage:
620618if __name__ == "__main__" :
621619 # Example usage with a file
622- gguf_writer = GGUFWriter . open ("example.gguf" )
620+ gguf_writer = GGUFWriter ("example.gguf" , "llama " )
623621
624- gguf_writer .add_architecture ("llama" )
625622 gguf_writer .add_uint32 ("answer" , 42 ) # Write a 32-bit integer
626623 gguf_writer .add_float32 ("answer_in_float" , 42.0 ) # Write a 32-bit float
627624 gguf_writer .add_custom_alignment (64 )
0 commit comments