@@ -89,6 +89,8 @@ class LLM:
8989 EXPERT_USED_COUNT = "{arch}.expert_used_count"
9090 EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
9191 EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
92+ EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
93+ EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
9294 POOLING_TYPE = "{arch}.pooling_type"
9395 LOGIT_SCALE = "{arch}.logit_scale"
9496 DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
@@ -257,6 +259,7 @@ class MODEL_TENSOR(IntEnum):
257259 FFN_GATE_SHEXP = auto ()
258260 FFN_DOWN_SHEXP = auto ()
259261 FFN_UP_SHEXP = auto ()
262+ FFN_EXP_PROBS_B = auto ()
260263 ATTN_Q_NORM = auto ()
261264 ATTN_K_NORM = auto ()
262265 LAYER_OUT_NORM = auto ()
@@ -387,6 +390,7 @@ class MODEL_TENSOR(IntEnum):
387390 MODEL_TENSOR .FFN_GATE_EXP : "blk.{bid}.ffn_gate_exps" ,
388391 MODEL_TENSOR .FFN_DOWN_EXP : "blk.{bid}.ffn_down_exps" ,
389392 MODEL_TENSOR .FFN_UP_EXP : "blk.{bid}.ffn_up_exps" ,
393+ MODEL_TENSOR .FFN_EXP_PROBS_B : "blk.{bid}.exp_probs_b" ,
390394 MODEL_TENSOR .LAYER_OUT_NORM : "blk.{bid}.layer_output_norm" ,
391395 MODEL_TENSOR .SSM_IN : "blk.{bid}.ssm_in" ,
392396 MODEL_TENSOR .SSM_CONV1D : "blk.{bid}.ssm_conv1d" ,
@@ -978,6 +982,7 @@ class MODEL_TENSOR(IntEnum):
978982 MODEL_TENSOR .FFN_GATE_SHEXP ,
979983 MODEL_TENSOR .FFN_DOWN_SHEXP ,
980984 MODEL_TENSOR .FFN_UP_SHEXP ,
985+ MODEL_TENSOR .FFN_EXP_PROBS_B
981986 ],
982987 MODEL_ARCH .CHATGLM : [
983988 MODEL_TENSOR .TOKEN_EMBD ,
@@ -1177,6 +1182,10 @@ class GGMLQuantizationType(IntEnum):
11771182 IQ2_TN = 42 ,
11781183
11791184
1185+ class ExpertGatingFuncType (IntEnum ):
1186+ SOFTMAX = 1
1187+ SIGMOID = 2
1188+
11801189
11811190# TODO: add GGMLFileType from ggml_ftype in ggml.h
11821191
0 commit comments