1919#
2020
2121GGUF_MAGIC = 0x46554747
22- GGUF_VERSION = 2
22+ GGUF_VERSION = 3
2323GGUF_DEFAULT_ALIGNMENT = 32
2424
25+
2526# general
2627KEY_GENERAL_ARCHITECTURE = "general.architecture"
2728KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version"
@@ -597,6 +598,10 @@ class GGMLQuantizationType(IntEnum):
597598 Q6_K = 14
598599 Q8_K = 15
599600
601+ class GGUFEndian (IntEnum ):
602+ LITTLE = 0
603+ BIG = 1
604+
600605
601606class GGUFValueType (IntEnum ):
602607 UINT8 = 0
@@ -644,18 +649,41 @@ class GGUFWriter:
644649 temp_file : tempfile .SpooledTemporaryFile [bytes ] | None = None
645650 tensors : list [tuple [np .ndarray [Any , Any ], int ]]
646651
647- def __init__ (self , path : os .PathLike [str ] | str , arch : str , use_temp_file = True ):
652+ @property
653+ def pack_prefix (self ):
654+ if self .endianess == GGUFEndian .LITTLE :
655+ return "<"
656+ else :
657+ return ">"
658+
659+ def __init__ (self , path : os .PathLike [str ] | str , arch : str , use_temp_file = True , endianess = GGUFEndian .LITTLE ):
648660 self .fout = open (path , "wb" )
649661 self .arch = arch
662+ self .endianess = endianess
663+ self ._simple_value_packing = {
664+ GGUFValueType .UINT8 : f"{ self .pack_prefix } B" ,
665+ GGUFValueType .INT8 : f"{ self .pack_prefix } b" ,
666+ GGUFValueType .UINT16 : f"{ self .pack_prefix } H" ,
667+ GGUFValueType .INT16 : f"{ self .pack_prefix } h" ,
668+ GGUFValueType .UINT32 : f"{ self .pack_prefix } I" ,
669+ GGUFValueType .INT32 : f"{ self .pack_prefix } i" ,
670+ GGUFValueType .FLOAT32 : f"{ self .pack_prefix } f" ,
671+ GGUFValueType .UINT64 : f"{ self .pack_prefix } Q" ,
672+ GGUFValueType .INT64 : f"{ self .pack_prefix } q" ,
673+ GGUFValueType .FLOAT64 : f"{ self .pack_prefix } d" ,
674+ GGUFValueType .BOOL : "?" ,
675+ }
650676 self .add_architecture ()
651677 self .use_temp_file = use_temp_file
652678 self .tensors = []
679+ endianess_str = "Big Endian" if self .endianess == GGUFEndian .BIG else "Little Endian"
680+ print (f"This gguf file is for { endianess_str } only" )
653681
654682 def write_header_to_file (self ):
655683 self .fout .write (struct .pack ("<I" , GGUF_MAGIC ))
656- self .fout .write (struct .pack ("< I" , GGUF_VERSION ))
657- self .fout .write (struct .pack ("< Q" , self .ti_data_count ))
658- self .fout .write (struct .pack ("< Q" , self .kv_data_count ))
684+ self .fout .write (struct .pack (f" { self . pack_prefix } I" , GGUF_VERSION ))
685+ self .fout .write (struct .pack (f" { self . pack_prefix } Q" , self .ti_data_count ))
686+ self .fout .write (struct .pack (f" { self . pack_prefix } Q" , self .kv_data_count ))
659687 self .flush ()
660688# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
661689
@@ -727,40 +755,27 @@ def add_array(self, key: str, val: Sequence[Any]):
727755 self .add_key (key )
728756 self .add_val (val , GGUFValueType .ARRAY )
729757
730- _simple_value_packing = {
731- GGUFValueType .UINT8 : "<B" ,
732- GGUFValueType .INT8 : "<b" ,
733- GGUFValueType .UINT16 : "<H" ,
734- GGUFValueType .INT16 : "<h" ,
735- GGUFValueType .UINT32 : "<I" ,
736- GGUFValueType .INT32 : "<i" ,
737- GGUFValueType .FLOAT32 : "<f" ,
738- GGUFValueType .UINT64 : "<Q" ,
739- GGUFValueType .INT64 : "<q" ,
740- GGUFValueType .FLOAT64 : "<d" ,
741- GGUFValueType .BOOL : "?" ,
742- }
743758 def add_val (self , val : Any , vtype : GGUFValueType | None = None , add_vtype : bool = True ):
744759 if vtype is None :
745760 vtype = GGUFValueType .get_type (val )
746761
747762 if add_vtype :
748- self .kv_data += struct .pack ("< I" , vtype )
763+ self .kv_data += struct .pack (f" { self . pack_prefix } I" , vtype )
749764 self .kv_data_count += 1
750765
751766 pack_fmt = self ._simple_value_packing .get (vtype )
752767 if pack_fmt is not None :
753768 self .kv_data += struct .pack (pack_fmt , val )
754769 elif vtype == GGUFValueType .STRING :
755770 encoded_val = val .encode ("utf8" ) if isinstance (val , str ) else val
756- self .kv_data += struct .pack ("< Q" , len (encoded_val ))
771+ self .kv_data += struct .pack (f" { self . pack_prefix } Q" , len (encoded_val ))
757772 self .kv_data += encoded_val
758773 elif vtype == GGUFValueType .ARRAY and isinstance (val , Sequence ) and len (val ) > 0 :
759774 ltype = GGUFValueType .get_type (val [0 ])
760775 if not all (GGUFValueType .get_type (i ) is ltype for i in val [1 :]):
761776 raise ValueError ("All items in a GGUF array should be of the same type" )
762- self .kv_data += struct .pack ("< I" , ltype )
763- self .kv_data += struct .pack ("< Q" , len (val ))
777+ self .kv_data += struct .pack (f" { self . pack_prefix } I" , ltype )
778+ self .kv_data += struct .pack (f" { self . pack_prefix } Q" , len (val ))
764779 for item in val :
765780 self .add_val (item , add_vtype = False )
766781 else :
@@ -774,22 +789,24 @@ def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype:
774789 assert raw_dtype is not None or tensor_dtype in (np .float32 , np .float16 ), "Only F32 and F16 tensors are supported for now"
775790
776791 encoded_name = name .encode ("utf8" )
777- self .ti_data += struct .pack ("< Q" , len (encoded_name ))
792+ self .ti_data += struct .pack (f" { self . pack_prefix } Q" , len (encoded_name ))
778793 self .ti_data += encoded_name
779794 n_dims = len (tensor_shape )
780- self .ti_data += struct .pack ("< I" , n_dims )
795+ self .ti_data += struct .pack (f" { self . pack_prefix } I" , n_dims )
781796 for i in range (n_dims ):
782- self .ti_data += struct .pack ("< Q" , tensor_shape [n_dims - 1 - i ])
797+ self .ti_data += struct .pack (f" { self . pack_prefix } Q" , tensor_shape [n_dims - 1 - i ])
783798 if raw_dtype is None :
784799 dtype = GGMLQuantizationType .F32 if tensor_dtype == np .float32 else GGMLQuantizationType .F16
785800 else :
786801 dtype = raw_dtype
787- self .ti_data += struct .pack ("< I" , dtype )
788- self .ti_data += struct .pack ("< Q" , self .offset_tensor )
802+ self .ti_data += struct .pack (f" { self . pack_prefix } I" , dtype )
803+ self .ti_data += struct .pack (f" { self . pack_prefix } Q" , self .offset_tensor )
789804 self .offset_tensor += GGUFWriter .ggml_pad (tensor_nbytes , self .data_alignment )
790805 self .ti_data_count += 1
791806
792807 def add_tensor (self , name : str , tensor : np .ndarray [Any , Any ], raw_shape : Sequence [int ] | None = None , raw_dtype : GGMLQuantizationType | None = None ):
808+ if self .endianess == GGUFEndian .BIG :
809+ tensor .byteswap (inplace = True )
793810 if self .use_temp_file and self .temp_file is None :
794811 fp = tempfile .SpooledTemporaryFile (mode = "w+b" , max_size = 256 * 1024 * 1024 )
795812 fp .seek (0 )
@@ -815,6 +832,8 @@ def write_padding(self, fp: BinaryIO, n: int, align: int | None = None):
815832 fp .write (bytes ([0 ] * pad ))
816833
817834 def write_tensor_data (self , tensor : np .ndarray [Any , Any ]):
835+ if self .endianess == GGUFEndian .BIG :
836+ tensor .byteswap (inplace = True )
818837 self .write_padding (self .fout , self .fout .tell ())
819838 tensor .tofile (self .fout )
820839 self .write_padding (self .fout , tensor .nbytes )
0 commit comments