Skip to content

Commit 7cda4dd

Browse files
committed
convert_hf : faster lazy safetensors
1 parent aaab241 commit 7cda4dd

File tree

2 files changed

+44
-11
lines changed

2 files changed

+44
-11
lines changed

convert_hf_to_gguf.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,9 +148,16 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
148148
tensor_names_from_parts.update(model_part.keys())
149149

150150
for name in model_part.keys():
151-
data = model_part.get_tensor(name) if self.is_safetensors else model_part[name]
152-
if self.lazy:
153-
data = LazyTorchTensor.from_eager(data)
151+
if self.is_safetensors:
152+
if self.lazy:
153+
data = model_part.get_slice(name)
154+
data = LazyTorchTensor.from_safetensors_slice(data)
155+
else:
156+
data = model_part.get_tensor(name)
157+
else:
158+
data = model_part[name]
159+
if self.lazy:
160+
data = LazyTorchTensor.from_eager(data)
154161
yield name, data
155162

156163
# only verify tensor name presence; it doesn't matter if they are not in the right files
@@ -3435,6 +3442,27 @@ class LazyTorchTensor(gguf.LazyBase):
34353442
torch.float32: np.float32,
34363443
}
34373444

3445+
# used for safetensors slices
3446+
# ref: https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/src/lib.rs#L1046
3447+
# TODO: uncomment U64, U32, and U16, ref: https://github.com/pytorch/pytorch/issues/58734
3448+
_dtype_str_map: dict[str, torch.dtype] = {
3449+
"F64": torch.float64,
3450+
"F32": torch.float32,
3451+
"BF16": torch.bfloat16,
3452+
"F16": torch.float16,
3453+
# "U64": torch.uint64,
3454+
"I64": torch.int64,
3455+
# "U32": torch.uint32,
3456+
"I32": torch.int32,
3457+
# "U16": torch.uint16,
3458+
"I16": torch.int16,
3459+
"U8": torch.uint8,
3460+
"I8": torch.int8,
3461+
"BOOL": torch.bool,
3462+
"F8_E4M3": torch.float8_e4m3fn,
3463+
"F8_E5M2": torch.float8_e5m2,
3464+
}
3465+
34383466
def numpy(self) -> gguf.LazyNumpyTensor:
34393467
dtype = self._dtype_map[self.dtype]
34403468
return gguf.LazyNumpyTensor(
@@ -3448,6 +3476,13 @@ def numpy(self) -> gguf.LazyNumpyTensor:
34483476
def meta_with_dtype_and_shape(cls, dtype: torch.dtype, shape: torch.Size) -> Tensor:
34493477
return torch.empty(size=shape, dtype=dtype, device="meta")
34503478

3479+
@classmethod
3480+
def from_safetensors_slice(cls, st_slice: Any) -> Tensor:
3481+
dtype = cls._dtype_str_map[st_slice.get_dtype()]
3482+
shape = st_slice.get_shape()
3483+
lazy = cls(meta=cls.meta_with_dtype_and_shape(dtype, shape), args=(st_slice,), func=lambda s: s[0][:])
3484+
return cast(torch.Tensor, lazy)
3485+
34513486
@classmethod
34523487
def __torch_function__(cls, func, types, args=(), kwargs=None):
34533488
del types # unused

gguf-py/gguf/tensor_mapping.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -602,14 +602,12 @@ def __init__(self, arch: MODEL_ARCH, n_blocks: int):
602602
for tensor, keys in self.block_mappings_cfg.items():
603603
if tensor not in MODEL_TENSORS[arch]:
604604
continue
605-
# TODO: make this configurable
606-
n_experts = 160
607-
for xid in range(n_experts):
608-
tensor_name = TENSOR_NAMES[tensor].format(bid = bid, xid = xid)
609-
self.mapping[tensor_name] = (tensor, tensor_name)
610-
for key in keys:
611-
key = key.format(bid = bid, xid = xid)
612-
self.mapping[key] = (tensor, tensor_name)
605+
606+
tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
607+
self.mapping[tensor_name] = (tensor, tensor_name)
608+
for key in keys:
609+
key = key.format(bid = bid)
610+
self.mapping[key] = (tensor, tensor_name)
613611

614612
def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
615613
result = self.mapping.get(key)

0 commit comments

Comments
 (0)