@@ -861,6 +861,9 @@ def _create_vocab_sentencepiece(self):
861
861
for token_id , token_data in added_tokens_decoder .items ():
862
862
token_id = int (token_id )
863
863
token : str = token_data ["content" ]
864
+ if token_id >= vocab_size :
865
+ logger .warning (f'ignore token { token_id } : id is out of range, max={ vocab_size - 1 } ' )
866
+ continue
864
867
if toktypes [token_id ] != SentencePieceTokenTypes .UNUSED :
865
868
if tokens [token_id ] != token .encode ("utf-8" ):
866
869
logger .warning (f'replacing token { token_id } : { tokens [token_id ].decode ("utf-8" )!r} -> { token !r} ' )
@@ -3322,6 +3325,83 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
3322
3325
return [(self .map_tensor_name (name ), data_torch )]
3323
3326
3324
3327
3328
+ @Model .register ("Gemma3ForCausalLM" , "Gemma3ForConditionalGeneration" )
3329
+ class Gemma3Model (Model ):
3330
+ model_arch = gguf .MODEL_ARCH .GEMMA3
3331
+ has_vision : bool = False
3332
+
3333
+ # we need to merge the text_config into the root level of hparams
3334
+ def __init__ (self , * args , ** kwargs ):
3335
+ hparams = Model .load_hparams (kwargs ["dir_model" ])
3336
+ if "text_config" in hparams :
3337
+ hparams = {** hparams , ** hparams ["text_config" ]}
3338
+ kwargs ["hparams" ] = hparams
3339
+ super ().__init__ (* args , ** kwargs )
3340
+ if "vision_config" in hparams :
3341
+ logger .info ("Has vision encoder, but it will be ignored" )
3342
+ self .has_vision = True
3343
+
3344
+ def write (self ):
3345
+ super ().write ()
3346
+ if self .has_vision :
3347
+ logger .info ("NOTE: this script only convert the language model to GGUF" )
3348
+ logger .info (" for the vision model, please use gemma3_convert_encoder_to_gguf.py" )
3349
+
3350
+ def set_vocab (self ):
3351
+ self ._set_vocab_sentencepiece ()
3352
+
3353
+ self .gguf_writer .add_add_space_prefix (False )
3354
+
3355
+ def set_gguf_parameters (self ):
3356
+ hparams = self .hparams
3357
+ block_count = hparams ["num_hidden_layers" ]
3358
+
3359
+ # some default values are not specified in the hparams
3360
+ self .gguf_writer .add_context_length (hparams .get ("max_position_embeddings" , 131072 ))
3361
+ self .gguf_writer .add_embedding_length (hparams ["hidden_size" ])
3362
+ self .gguf_writer .add_block_count (block_count )
3363
+ self .gguf_writer .add_feed_forward_length (hparams ["intermediate_size" ])
3364
+ self .gguf_writer .add_head_count (hparams .get ("num_attention_heads" , 8 ))
3365
+ self .gguf_writer .add_layer_norm_rms_eps (self .hparams .get ("rms_norm_eps" , 1e-6 ))
3366
+ self .gguf_writer .add_key_length (hparams .get ("head_dim" , 256 ))
3367
+ self .gguf_writer .add_value_length (hparams .get ("head_dim" , 256 ))
3368
+ self .gguf_writer .add_file_type (self .ftype )
3369
+ self .gguf_writer .add_rope_freq_base (hparams .get ("rope_theta" , 1_000_000.0 )) # for global layers
3370
+ # both attn_logit_softcapping and final_logit_softcapping are removed in Gemma3
3371
+ assert hparams .get ("attn_logit_softcapping" ) is None
3372
+ assert hparams .get ("final_logit_softcapping" ) is None
3373
+ self .gguf_writer .add_sliding_window (hparams ["sliding_window" ])
3374
+ self .gguf_writer .add_head_count_kv (hparams .get ("num_key_value_heads" , 4 ))
3375
+ if hparams .get ("rope_scaling" ) is not None :
3376
+ assert hparams ["rope_scaling" ]["rope_type" ] == "linear"
3377
+ # important: this rope_scaling is only applied for global layers, and not used by 1B model
3378
+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
3379
+ self .gguf_writer .add_rope_scaling_factor (hparams ["rope_scaling" ]["factor" ])
3380
+
3381
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
3382
+ del bid # unused
3383
+
3384
+ if name .startswith ("language_model." ):
3385
+ name = name .replace ("language_model." , "" )
3386
+ elif name .startswith ("multi_modal_projector." ) or name .startswith ("vision_tower." ) \
3387
+ or name .startswith ("multimodal_projector." ) or name .startswith ("vision_model." ): # this is for old HF model, should be removed later
3388
+ # ignore vision tensors
3389
+ return []
3390
+
3391
+ # remove OOV (out-of-vocabulary) rows in token_embd
3392
+ if "embed_tokens.weight" in name :
3393
+ vocab = self ._create_vocab_sentencepiece ()
3394
+ tokens = vocab [0 ]
3395
+ data_torch = data_torch [:len (tokens )]
3396
+
3397
+ # ref code in Gemma3RMSNorm
3398
+ # output = output * (1.0 + self.weight.float())
3399
+ if name .endswith ("norm.weight" ):
3400
+ data_torch = data_torch + 1
3401
+
3402
+ return [(self .map_tensor_name (name ), data_torch )]
3403
+
3404
+
3325
3405
@Model .register ("Starcoder2ForCausalLM" )
3326
3406
class StarCoder2Model (Model ):
3327
3407
model_arch = gguf .MODEL_ARCH .STARCODER2
0 commit comments