Skip to content

Commit 12f8034

Browse files
compiladepwilkin
authored andcommitted
convert : avoid dequantizing mxfp4 for GPT-OSS (ggml-org#16756)
1 parent 1929ecd commit 12f8034

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

convert_hf_to_gguf.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8943,6 +8943,13 @@ def set_vocab(self):
89438943
class GptOssModel(TextModel):
89448944
model_arch = gguf.MODEL_ARCH.GPT_OSS
89458945

8946+
# TODO: remove once MXFP4 is supported more generally
8947+
def dequant_model(self):
8948+
quant_config = self.hparams.get("quantization_config")
8949+
if quant_config is not None and quant_config.get("quant_method") == "mxfp4":
8950+
return
8951+
return super().dequant_model()
8952+
89468953
def transform_nibble_layout(self, tensor):
89478954
assert tensor.dtype == torch.uint8
89488955
assert tensor.shape[-1] == 16

0 commit comments

Comments
 (0)