diff --git a/src/transformers/models/gemma3/convert_gemma3_weights.py b/src/transformers/models/gemma3/convert_gemma3_weights.py index a963f1a3f62e..2129d04a362c 100644 --- a/src/transformers/models/gemma3/convert_gemma3_weights.py +++ b/src/transformers/models/gemma3/convert_gemma3_weights.py @@ -191,7 +191,10 @@ num_hidden_layers=34, num_key_value_heads=4, sliding_window=1024, - rope_parameters={"rope_type": "linear", "factor": 8.0}, # used for global RoPE only + rope_parameters={ + "full_attention": {"rope_type": "linear", "factor": 8.0}, + "sliding_attention": {"rope_type": "default"}, + }, rope_theta=1_000_000, rope_local_base_freq=10_000, attn_logit_softcapping=None, @@ -209,7 +212,10 @@ num_hidden_layers=48, num_key_value_heads=8, sliding_window=1024, - rope_parameters={"rope_type": "linear", "factor": 8.0}, # used for global RoPE only + rope_parameters={ + "full_attention": {"rope_type": "linear", "factor": 8.0}, + "sliding_attention": {"rope_type": "default"}, + }, rope_theta=1_000_000, rope_local_base_freq=10_000, attn_logit_softcapping=None, @@ -227,7 +233,10 @@ num_key_value_heads=16, head_dim=128, sliding_window=1024, - rope_parameters={"rope_type": "linear", "factor": 8.0}, # used for global RoPE only + rope_parameters={ + "full_attention": {"rope_type": "linear", "factor": 8.0}, + "sliding_attention": {"rope_type": "default"}, + }, rope_theta=1_000_000, rope_local_base_freq=10_000, attn_logit_softcapping=None,