@@ -7850,83 +7850,18 @@ def repack_mxfp4(self, new_name: str, blocks: Tensor, scales: Tensor):
7850
7850
new_data = new_data .reshape (new_data .shape [0 ], new_data .shape [1 ], new_data .shape [2 ] * new_data .shape [3 ])
7851
7851
self .gguf_writer .add_tensor (new_name , new_data , raw_dtype = gguf .GGMLQuantizationType .MXFP4 )
7852
7852
7853
- def convert_moe_packed_tensors (
7854
- self ,
7855
- new_name : str ,
7856
- blocks ,
7857
- scales ,
7858
- * ,
7859
- dtype : torch .dtype = torch .float32 ,
7860
- rows_per_chunk : int = 32768 * 1024 ,
7861
- ) -> tuple [str , Tensor ]:
7862
- import math
7863
-
7864
- scales = scales .to (torch .int32 ) - 127
7865
-
7866
- assert blocks .shape [:- 1 ] == scales .shape , f"{ blocks .shape = } does not match { scales .shape = } "
7867
-
7868
- FP4_VALUES = [
7869
- + 0.0 ,
7870
- + 0.5 ,
7871
- + 1.0 ,
7872
- + 1.5 ,
7873
- + 2.0 ,
7874
- + 3.0 ,
7875
- + 4.0 ,
7876
- + 6.0 ,
7877
- - 0.0 ,
7878
- - 0.5 ,
7879
- - 1.0 ,
7880
- - 1.5 ,
7881
- - 2.0 ,
7882
- - 3.0 ,
7883
- - 4.0 ,
7884
- - 6.0 ,
7885
- ]
7886
- blocks = blocks .to (device = "cpu" )
7887
- scales = scales .to (device = "cpu" )
7888
- lut = torch .tensor (FP4_VALUES , dtype = dtype , device = blocks .device )
7889
-
7890
- * prefix_shape , G , B = blocks .shape
7891
- rows_total = math .prod (prefix_shape ) * G
7892
-
7893
- blocks = blocks .reshape (rows_total , B )
7894
- scales = scales .reshape (rows_total , 1 )
7895
-
7896
- out = torch .empty (rows_total , B * 2 , dtype = dtype , device = "cpu" )
7897
-
7898
- for r0 in range (0 , rows_total , rows_per_chunk ):
7899
- r1 = min (r0 + rows_per_chunk , rows_total )
7900
-
7901
- blk = blocks [r0 :r1 ]
7902
- exp = scales [r0 :r1 ]
7903
-
7904
- # nibble indices -> int64
7905
- idx_lo = (blk & 0x0F ).to (torch .long )
7906
- idx_hi = (blk >> 4 ).to (torch .long )
7907
-
7908
- sub = out [r0 :r1 ]
7909
- sub [:, 0 ::2 ] = lut [idx_lo ]
7910
- sub [:, 1 ::2 ] = lut [idx_hi ]
7911
-
7912
- torch .ldexp (sub , exp , out = sub )
7913
- del idx_lo , idx_hi , blk , exp
7914
-
7915
- out = out .reshape (* prefix_shape , G , B * 2 ).view (* prefix_shape , G * B * 2 )
7916
- logger .info (f"Unpacked { new_name } with shape { out .shape } from MXFP4" )
7917
- return new_name , out
7918
-
7919
7853
def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
7920
7854
blocks0 : Tensor = torch .zeros (1 )
7921
7855
blocks1 : Tensor = torch .zeros (1 )
7856
+ found_mxfp4_tensors = False
7922
7857
# we assume that tensors are loaded in the correct order
7923
7858
for name , data_torch in self .get_tensors ():
7924
7859
if "mlp.experts.down_proj_blocks" in name :
7925
7860
blocks0 = data_torch
7926
7861
elif "mlp.experts.down_proj_scales" in name :
7927
7862
new_name = self .map_tensor_name (name .replace ("_scales" , ".weight" ))
7928
7863
self .repack_mxfp4 (new_name , blocks0 , data_torch )
7929
- # yield self.convert_moe_packed_tensors(new_name, blocks0, data_torch)
7864
+ found_mxfp4_tensors = True
7930
7865
elif "mlp.experts.gate_up_proj_blocks" in name :
7931
7866
blocks0 , blocks1 = data_torch [:, ::2 , :, :], data_torch [:, 1 ::2 , :, :]
7932
7867
elif "mlp.experts.gate_up_proj_scales" in name :
@@ -7935,8 +7870,9 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
7935
7870
new_name_up = self .map_tensor_name (name .replace ("gate_up_proj_scales" , "up_proj.weight" ))
7936
7871
self .repack_mxfp4 (new_name_gate , blocks0 , scales0 )
7937
7872
self .repack_mxfp4 (new_name_up , blocks1 , scales1 )
7938
- # yield self.convert_moe_packed_tensors(new_name_gate, blocks0, scales0)
7939
- # yield self.convert_moe_packed_tensors(new_name_up, blocks1, scales1)
7873
+ found_mxfp4_tensors = True
7874
+ if not found_mxfp4_tensors :
7875
+ raise ValueError ("No MXFP4 tensors found in the model. Please make sure you are using MXFP4 model." )
7940
7876
return []
7941
7877
7942
7878
def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
0 commit comments