Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion vllm/model_executor/models/gemma3n.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,27 @@ def __init__(
offset = 2 if self.sliding_window is not None else 1
kv_shared_layer_index = first_kv_shared_layer_idx - offset
if kv_shared_layer_index >= 0:
# Different model wrappers expose layer parameters under
# different parent attributes.
# For example:
# - Gemma3nForCausalLM → parameters live under "model.layers"
# - Gemma3nForConditionalGeneration →
# under "language_model.model.layers"
# This logic extracts the portion of the parameter name
# *before* ".layers."
# so downstream code can consistently reference the correct
# model root regardless of which wrapper class was used.
if ".layers." in prefix:
param_name_before_layers = prefix.split(".layers.")[0]
else:
raise ValueError(
"Unexpected prefix format for Gemma3nAttention: "
f"'{prefix}'. The prefix is expected to contain "
"'.layers.' to correctly determine the KV sharing "
"target layer."
)
# Only the greater layer is required to specify sharing.
kv_sharing_target_layer_name = f"language_model.model.layers.{kv_shared_layer_index}.self_attn.attn" # noqa: E501
kv_sharing_target_layer_name = f"{param_name_before_layers}.layers.{kv_shared_layer_index}.self_attn.attn" # noqa: E501

self.rotary_emb = get_rope(
self.head_dim,
Expand Down