@@ -669,7 +669,6 @@ def lazy_load_file(path: Path) -> ModelPlus:
669669In = TypeVar ('In' )
670670Out = TypeVar ('Out' )
671671
672-
673672def bounded_parallel_map (func : Callable [[In ], Out ], iterable : Iterable [In ], concurrency : int ) -> Iterable [Out ]:
674673 '''Parallel map, but with backpressure. If the caller doesn't call `next`
675674 fast enough, this will stop calling `func` at some point rather than
@@ -734,42 +733,67 @@ def add_meta_vocab(self, vocab: Vocab) -> None:
734733
735734 # TODO: added / special tokens
736735
736+ def add_tensor_info (self , name : str , tensor : LazyTensor ) -> None :
737+ n_elements = 1
738+ for dim in tensor .shape :
739+ n_elements *= dim
740+ data_type = DATA_TYPE_TO_NUMPY [tensor .data_type ]
741+ data_nbytes = n_elements * data_type .itemsize
742+ self .gguf .add_tensor_info (name , tensor .shape , data_type , data_nbytes )
743+
737744 def write_meta (self ) -> None :
738745 self .gguf .write_header_to_file ()
739746 self .gguf .write_kv_data_to_file ()
740747
748+ def write_tensor_info (self ) -> None :
749+ self .gguf .write_ti_data_to_file ()
750+
741751 def close (self ) -> None :
742752 self .gguf .close ()
743753
744754 @staticmethod
745755 def write_vocab_only (fname_out : Path , params : Params , vocab : Vocab ) -> None :
756+ check_vocab_size (params , vocab )
757+
746758 of = OutputFile (fname_out )
759+
760+ # meta data
747761 of .add_meta_arch (params )
748762 of .add_meta_vocab (vocab )
749763 of .write_meta ()
764+
750765 of .close ()
751766
752767 @staticmethod
753768 def write_all (fname_out : Path , params : Params , model : LazyModel , vocab : Vocab ) -> None :
754769 check_vocab_size (params , vocab )
755770
756771 of = OutputFile (fname_out )
772+
773+ # meta data
757774 of .add_meta_arch (params )
758775 of .add_meta_vocab (vocab )
759776
777+ # tensor info
778+ for name , lazy_tensor in model .items ():
779+ of .add_tensor_info (name , lazy_tensor )
780+
781+ of .write_meta ()
782+ of .write_tensor_info ()
783+
760784 def do_item (item : Tuple [str , LazyTensor ]) -> NDArray :
761785 name , lazy_tensor = item
762786 return lazy_tensor .load ().to_ggml ().ndarray
763787
788+ # tensor data
764789 ndarrays = bounded_parallel_map (do_item , model .items (), concurrency = 8 )
765790 for i , ((name , lazy_tensor ), ndarray ) in enumerate (zip (model .items (), ndarrays )):
766791 size = ' x ' .join (f"{ dim :6d} " for dim in lazy_tensor .shape )
767792 padi = len (str (len (model )))
768793 print (f"[{ i + 1 :{padi }d} /{ len (model )} ] Writing tensor { name :38s} | size { size :16} | type { lazy_tensor .data_type } " )
769- #of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type)
770- ndarray .tofile (of .fout )
771- of .fout .close ()
794+ of .gguf .write_tensor_data (ndarray )
772795
796+ of .close ()
773797
774798def pick_output_type (model : LazyModel , output_type_str : Optional [str ]) -> GGMLFileType :
775799 wq_type = model [NAMES [gguf .MODEL_TENSOR .ATTN_Q ].format (bid = 0 )+ ".weight" ].data_type
@@ -783,6 +807,9 @@ def pick_output_type(model: LazyModel, output_type_str: Optional[str]) -> GGMLFi
783807
784808 raise Exception (f"Unexpected combination of types: { name_to_type } " )
785809
810+ def convert_to_output_type (model : LazyModel , output_type : GGMLFileType ) -> LazyModel :
811+ return {name : tensor .astype (output_type .type_for_tensor (name , tensor ))
812+ for (name , tensor ) in model .items ()}
786813
787814def convert_model_names (model : LazyModel , params : Params ) -> LazyModel :
788815 tmap = gguf .get_tensor_name_map (ARCH , params .n_layer )
@@ -808,12 +835,6 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
808835
809836 return out
810837
811-
812- def convert_to_output_type (model : LazyModel , output_type : GGMLFileType ) -> LazyModel :
813- return {name : tensor .astype (output_type .type_for_tensor (name , tensor ))
814- for (name , tensor ) in model .items ()}
815-
816-
817838def nth_multifile_path (path : Path , n : int ) -> Optional [Path ]:
818839 '''Given any path belonging to a multi-file model (e.g. foo.bin.1), return
819840 the nth path in the model.
0 commit comments