Skip to content

Commit 04cfb6d

Browse files
committed
force using mxfp4
1 parent 3c4725b commit 04cfb6d

File tree

1 file changed

+5
-69
lines changed

1 file changed

+5
-69
lines changed

convert_hf_to_gguf.py

Lines changed: 5 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -7850,83 +7850,18 @@ def repack_mxfp4(self, new_name: str, blocks: Tensor, scales: Tensor):
78507850
new_data = new_data.reshape(new_data.shape[0], new_data.shape[1], new_data.shape[2] * new_data.shape[3])
78517851
self.gguf_writer.add_tensor(new_name, new_data, raw_dtype=gguf.GGMLQuantizationType.MXFP4)
78527852

7853-
def convert_moe_packed_tensors(
7854-
self,
7855-
new_name: str,
7856-
blocks,
7857-
scales,
7858-
*,
7859-
dtype: torch.dtype = torch.float32,
7860-
rows_per_chunk: int = 32768 * 1024,
7861-
) -> tuple[str, Tensor]:
7862-
import math
7863-
7864-
scales = scales.to(torch.int32) - 127
7865-
7866-
assert blocks.shape[:-1] == scales.shape, f"{blocks.shape=} does not match {scales.shape=}"
7867-
7868-
FP4_VALUES = [
7869-
+0.0,
7870-
+0.5,
7871-
+1.0,
7872-
+1.5,
7873-
+2.0,
7874-
+3.0,
7875-
+4.0,
7876-
+6.0,
7877-
-0.0,
7878-
-0.5,
7879-
-1.0,
7880-
-1.5,
7881-
-2.0,
7882-
-3.0,
7883-
-4.0,
7884-
-6.0,
7885-
]
7886-
blocks = blocks.to(device="cpu")
7887-
scales = scales.to(device="cpu")
7888-
lut = torch.tensor(FP4_VALUES, dtype=dtype, device=blocks.device)
7889-
7890-
*prefix_shape, G, B = blocks.shape
7891-
rows_total = math.prod(prefix_shape) * G
7892-
7893-
blocks = blocks.reshape(rows_total, B)
7894-
scales = scales.reshape(rows_total, 1)
7895-
7896-
out = torch.empty(rows_total, B * 2, dtype=dtype, device="cpu")
7897-
7898-
for r0 in range(0, rows_total, rows_per_chunk):
7899-
r1 = min(r0 + rows_per_chunk, rows_total)
7900-
7901-
blk = blocks[r0:r1]
7902-
exp = scales[r0:r1]
7903-
7904-
# nibble indices -> int64
7905-
idx_lo = (blk & 0x0F).to(torch.long)
7906-
idx_hi = (blk >> 4).to(torch.long)
7907-
7908-
sub = out[r0:r1]
7909-
sub[:, 0::2] = lut[idx_lo]
7910-
sub[:, 1::2] = lut[idx_hi]
7911-
7912-
torch.ldexp(sub, exp, out=sub)
7913-
del idx_lo, idx_hi, blk, exp
7914-
7915-
out = out.reshape(*prefix_shape, G, B * 2).view(*prefix_shape, G * B * 2)
7916-
logger.info(f"Unpacked {new_name} with shape {out.shape} from MXFP4")
7917-
return new_name, out
7918-
79197853
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
79207854
blocks0: Tensor = torch.zeros(1)
79217855
blocks1: Tensor = torch.zeros(1)
7856+
found_mxfp4_tensors = False
79227857
# we assume that tensors are loaded in the correct order
79237858
for name, data_torch in self.get_tensors():
79247859
if "mlp.experts.down_proj_blocks" in name:
79257860
blocks0 = data_torch
79267861
elif "mlp.experts.down_proj_scales" in name:
79277862
new_name = self.map_tensor_name(name.replace("_scales", ".weight"))
79287863
self.repack_mxfp4(new_name, blocks0, data_torch)
7929-
# yield self.convert_moe_packed_tensors(new_name, blocks0, data_torch)
7864+
found_mxfp4_tensors = True
79307865
elif "mlp.experts.gate_up_proj_blocks" in name:
79317866
blocks0, blocks1 = data_torch[:, ::2, :, :], data_torch[:, 1::2, :, :]
79327867
elif "mlp.experts.gate_up_proj_scales" in name:
@@ -7935,8 +7870,9 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
79357870
new_name_up = self.map_tensor_name(name.replace("gate_up_proj_scales", "up_proj.weight"))
79367871
self.repack_mxfp4(new_name_gate, blocks0, scales0)
79377872
self.repack_mxfp4(new_name_up, blocks1, scales1)
7938-
# yield self.convert_moe_packed_tensors(new_name_gate, blocks0, scales0)
7939-
# yield self.convert_moe_packed_tensors(new_name_up, blocks1, scales1)
7873+
found_mxfp4_tensors = True
7874+
if not found_mxfp4_tensors:
7875+
raise ValueError("No MXFP4 tensors found in the model. Please make sure you are using MXFP4 model.")
79407876
return []
79417877

79427878
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:

0 commit comments

Comments
 (0)