Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
2c1d904
split out from timm PR
zucchini-nlp Feb 26, 2026
0caafab
all other VLMs
zucchini-nlp Feb 26, 2026
edb4d27
timm backbone is not here
zucchini-nlp Feb 26, 2026
cbbecf4
oops, extra key is breaking eveerything
zucchini-nlp Feb 26, 2026
1fd89ae
.
zucchini-nlp Feb 26, 2026
0a4a829
this test
zucchini-nlp Feb 26, 2026
af57e2e
maybe
zucchini-nlp Feb 26, 2026
ea3ac63
Merge branch 'main' into convert-weights-recursive
zucchini-nlp Feb 26, 2026
fbda051
fix missing keys when loading from hub
zucchini-nlp Feb 27, 2026
88f18be
now fix fast tests
zucchini-nlp Feb 27, 2026
6eb0e13
Merge branch 'main' into convert-weights-recursive
zucchini-nlp Feb 27, 2026
b9f29ab
merge gone wrong
zucchini-nlp Feb 27, 2026
d452817
fix repo
zucchini-nlp Feb 27, 2026
500e96b
refine the regex again!
zucchini-nlp Feb 27, 2026
3fcf527
Merge branch 'main' into convert-weights-recursive
zucchini-nlp Mar 3, 2026
23a3e13
close the bracket
zucchini-nlp Mar 3, 2026
42e85f0
Merge branch 'main' into convert-weights-recursive
zucchini-nlp Mar 10, 2026
e85b103
Apply suggestions from code review
zucchini-nlp Mar 11, 2026
ce82bcc
merge main
zucchini-nlp Mar 12, 2026
d5ab4fd
main
zucchini-nlp Mar 17, 2026
dcc95d5
revert unrelated
zucchini-nlp Mar 17, 2026
f4b5888
!
zucchini-nlp Mar 17, 2026
215da83
revert more
zucchini-nlp Mar 17, 2026
370feb6
add submodule prefix when recursing
zucchini-nlp Mar 17, 2026
3c6a23f
Merge branch 'main' into convert-weights-recursive
zucchini-nlp Mar 17, 2026
b3559f7
i'll need to fix maskformer later
zucchini-nlp Mar 17, 2026
960c716
dont duplicate the same pattern twice
zucchini-nlp Mar 18, 2026
52560f6
Merge branch 'main' into convert-weights-recursive
zucchini-nlp Mar 18, 2026
17294cc
fix modular
zucchini-nlp Mar 18, 2026
7503c12
detr
zucchini-nlp Mar 18, 2026
aac4bba
colpali isn't working still!
zucchini-nlp Mar 19, 2026
1c770a1
oke, so this can be fine for now
zucchini-nlp Mar 19, 2026
6780970
!
zucchini-nlp Mar 19, 2026
a1220c9
revert
zucchini-nlp Mar 19, 2026
5a68bf7
dot lost in regex and comments
zucchini-nlp Mar 20, 2026
d10cb69
Merge branch 'main' into convert-weights-recursive
zucchini-nlp Mar 20, 2026
abb8001
timm wrapper is weird
zucchini-nlp Mar 20, 2026
503c206
skip these, timm wrapper
zucchini-nlp Mar 23, 2026
f92c063
Merge branch 'main' into convert-weights-recursive
zucchini-nlp Mar 23, 2026
ca68663
bye bye timm
zucchini-nlp Mar 23, 2026
4c65203
make repo check happy
zucchini-nlp Mar 23, 2026
cc19ab9
Revert "bye bye timm"
zucchini-nlp Mar 24, 2026
7e3d40e
love timm!
zucchini-nlp Mar 24, 2026
29e600a
Merge branch 'main' into convert-weights-recursive
zucchini-nlp Mar 25, 2026
df86ff7
Apply repo consistency fixes
github-actions[bot] Mar 25, 2026
ebcb04f
oke, the bot can't fix it so here we go
zucchini-nlp Mar 25, 2026
99f0a05
Merge branch 'main' into convert-weights-recursive
zucchini-nlp Mar 25, 2026
58e0d1c
Merge branch 'main' into convert-weights-recursive
zucchini-nlp Mar 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/source/en/model_doc/maskformer.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ This model was contributed by [francesco](https://huggingface.co/francesco). The

[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput

## MaskFormerDetrConfig

[[autodoc]] MaskFormerDetrConfig

## MaskFormerConfig

[[autodoc]] MaskFormerConfig
Expand Down
107 changes: 71 additions & 36 deletions src/transformers/conversion_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
"qwen3_omni_moe": "qwen2_moe",
"qwen3_omni_moe_thinker": "qwen2_moe",
"qwen3_next": "qwen2_moe",
"qwen3_5_moe": "qwen2_moe",
Comment thread
zucchini-nlp marked this conversation as resolved.
"hunyuan_v1_moe": "qwen2_moe",
"flex_olmo": "qwen2_moe",
"olmoe": "qwen2_moe",
Expand Down Expand Up @@ -91,7 +90,6 @@ def _build_checkpoint_conversion_mapping():
],
"colpali": [
WeightRenaming(source_patterns=r"vlm(?!\.model)", target_patterns="vlm.model"),
WeightRenaming(source_patterns=r"language_model.model", target_patterns="language_model"),
],
"emu3": [
WeightRenaming(source_patterns=r"text_model.model", target_patterns="text_model"),
Expand All @@ -109,20 +107,16 @@ def _build_checkpoint_conversion_mapping():
source_patterns=r"(?<!_)model(?!\.(language_model|visual))", target_patterns="model.language_model"
),
],
"colqwen2": [
WeightRenaming(
source_patterns=r"vlm.model(?!\.(language_model|visual))",
target_patterns="vlm.model.language_model",
),
],
"gemma3n_text": [
WeightRenaming(source_patterns=r"^model.language_model", target_patterns="model"),
],
"timm_wrapper": [
# Simply add the prefix `timm_model`. Similar to `base_model_prefix` but also removes prefix
# when saving.TODO: Would be probably much cleaner with a `add_prefix` argument in WeightRenaming
# when saving. TODO: Would be probably much cleaner with a `add_prefix` argument in WeightRenaming
# Note: we don't add `timm_model` when it is part of a bigger VLM, because they already have `timm_model`
# saved in state dict keys. Thus the look behind check. Should be fixed by proper `add_prefix`!
WeightRenaming(
source_patterns=r"(.+)",
source_patterns=r"^(?!(?:model\.|backbone\.|tower\.))(.+)$",
target_patterns=r"timm_model.\1",
)
],
Expand All @@ -147,7 +141,6 @@ def _build_checkpoint_conversion_mapping():
target_patterns="model.vlm.language_model.embed_tokens",
),
],
"chmv2": [WeightRenaming(r"backbone.layer.", r"backbone.model.layer.")],
"dinov3_convnext": [WeightRenaming(r"(?<!model\.)stages", r"model.stages")],
"dinov3_vit": [WeightRenaming(r"(?<!model\.)layer.", r"model.layer.")],
"timesfm2_5": [
Expand All @@ -161,21 +154,16 @@ def _build_checkpoint_conversion_mapping():
"qwen3_5_text": [
WeightRenaming(source_patterns=r"^model.language_model", target_patterns="model"),
],
"t5gemma2": [
WeightRenaming(r"(?<!vision_model\.)encoder.embed_tokens.", "encoder.text_model.embed_tokens."),
WeightRenaming(r"(?<!vision_model\.)encoder.norm.", "encoder.text_model.norm."),
WeightRenaming(r"(?<!vision_model\.)encoder.layers.", "encoder.text_model.layers."),
],
"sam3_tracker": [
WeightRenaming(
source_patterns=r"detector_model.vision_encoder.backbone.", target_patterns="vision_encoder.backbone."
),
WeightRenaming(source_patterns=r"tracker_neck.", target_patterns="vision_encoder.neck."),
],
"t5gemma2_encoder": [
WeightRenaming("^embed_tokens.", "text_model.embed_tokens."),
WeightRenaming("^norm.", "text_model.norm."),
WeightRenaming("^layers.", "text_model.layers."),
WeightRenaming(r"(?<!decoder\.)(?<!text_model\.)embed_tokens\.", "text_model.embed_tokens."),
WeightRenaming(r"(?<!decoder\.)(?<!text_model\.)(?<!layer)(?<!_)norm\.", "text_model.norm."),
WeightRenaming(r"(?<!vision_model.encoder\.)(?<!decoder\.)(?<!text_model\.)layers.", "text_model.layers."),
],
"mixtral": [
WeightRenaming(".block_sparse_moe.", ".mlp."),
Expand Down Expand Up @@ -320,6 +308,24 @@ def _build_checkpoint_conversion_mapping():
WeightRenaming("out_proj", "o_proj"),
WeightRenaming(r"layers.(\d+).fc1", r"layers.\1.mlp.fc1"),
WeightRenaming(r"layers.(\d+).fc2", r"layers.\1.mlp.fc2"),
# `DetrForSegmentation`
WeightRenaming("bbox_attention.q_linear", "bbox_attention.q_proj"),
WeightRenaming("bbox_attention.k_linear", "bbox_attention.k_proj"),
# Mask head refactor
Comment on lines +311 to +314
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was straightforward, so I just moved it here and deleted VLMS

WeightRenaming("mask_head.lay1", "mask_head.conv1.conv"),
WeightRenaming("mask_head.gn1", "mask_head.conv1.norm"),
WeightRenaming("mask_head.lay2", "mask_head.conv2.conv"),
WeightRenaming("mask_head.gn2", "mask_head.conv2.norm"),
WeightRenaming("mask_head.adapter1", "mask_head.fpn_stages.0.fpn_adapter"),
WeightRenaming("mask_head.lay3", "mask_head.fpn_stages.0.refine.conv"),
WeightRenaming("mask_head.gn3", "mask_head.fpn_stages.0.refine.norm"),
WeightRenaming("mask_head.adapter2", "mask_head.fpn_stages.1.fpn_adapter"),
WeightRenaming("mask_head.lay4", "mask_head.fpn_stages.1.refine.conv"),
WeightRenaming("mask_head.gn4", "mask_head.fpn_stages.1.refine.norm"),
WeightRenaming("mask_head.adapter3", "mask_head.fpn_stages.2.fpn_adapter"),
WeightRenaming("mask_head.lay5", "mask_head.fpn_stages.2.refine.conv"),
WeightRenaming("mask_head.gn5", "mask_head.fpn_stages.2.refine.norm"),
WeightRenaming("mask_head.out_lay", "mask_head.output_conv"),
],
"rt_detr": [
WeightRenaming("out_proj", "o_proj"),
Expand Down Expand Up @@ -348,6 +354,24 @@ def _build_checkpoint_conversion_mapping():
WeightRenaming(
r"decoder.layers.(\d+).ca_qpos_sine_proj", r"decoder.layers.\1.encoder_attn.q_pos_sine_proj"
),
# The rest of patterns are used only in `ConditionalDetrForSegmentation`
WeightRenaming("bbox_attention.q_linear", "bbox_attention.q_proj"),
WeightRenaming("bbox_attention.k_linear", "bbox_attention.k_proj"),
# Mask head refactor
WeightRenaming("mask_head.lay1", "mask_head.conv1.conv"),
WeightRenaming("mask_head.gn1", "mask_head.conv1.norm"),
WeightRenaming("mask_head.lay2", "mask_head.conv2.conv"),
WeightRenaming("mask_head.gn2", "mask_head.conv2.norm"),
WeightRenaming("mask_head.adapter1", "mask_head.fpn_stages.0.fpn_adapter"),
WeightRenaming("mask_head.lay3", "mask_head.fpn_stages.0.refine.conv"),
WeightRenaming("mask_head.gn3", "mask_head.fpn_stages.0.refine.norm"),
WeightRenaming("mask_head.adapter2", "mask_head.fpn_stages.1.fpn_adapter"),
WeightRenaming("mask_head.lay4", "mask_head.fpn_stages.1.refine.conv"),
WeightRenaming("mask_head.gn4", "mask_head.fpn_stages.1.refine.norm"),
WeightRenaming("mask_head.adapter3", "mask_head.fpn_stages.2.fpn_adapter"),
WeightRenaming("mask_head.lay5", "mask_head.fpn_stages.2.refine.conv"),
WeightRenaming("mask_head.gn5", "mask_head.fpn_stages.2.refine.norm"),
WeightRenaming("mask_head.out_lay", "mask_head.output_conv"),
],
"deformable_detr": [
WeightRenaming("backbone.conv_encoder", "backbone"),
Expand Down Expand Up @@ -503,8 +527,12 @@ def register_checkpoint_conversion_mapping(
_checkpoint_conversion_mapping_cache[model_type] = mapping


# DO NOT MODIFY, KEPT FOR BC ONLY
VLMS = ["detr"]
def extract_weight_conversions_for_model(model: PreTrainedModel) -> list[WeightConverter | WeightRenaming] | None:
model_type = getattr(model.config, "model_type", None)
if model_type is not None:
model_specific_conversions = get_checkpoint_conversion_mapping(model_type)
return model_specific_conversions
return None


def get_model_conversion_mapping(
Expand All @@ -517,28 +545,35 @@ def get_model_conversion_mapping(
For a given `model`, obtain the weight conversion mapping if any are registered either as a simple renaming
`_checkpoint_conversion_mapping` class argument, or in the general WeightConverter mapping.
"""
# Lazy import to avoid circular import issues
from .modeling_utils import PreTrainedModel

# note: this function is used in PEFT, so changing the API requires coordination
weight_conversions = []

# Load models with explicit, user-provided key mapping
if key_mapping is not None:
weight_conversions = [WeightRenaming(source_patterns=k, target_patterns=v) for k, v in key_mapping.items()]
elif any(
allowed_name in class_name.__name__.lower()
for class_name in model.__class__.__mro__[:-1]
for allowed_name in VLMS
):
weight_conversions = [
WeightRenaming(source_patterns=k, target_patterns=v)
for k, v in model._checkpoint_conversion_mapping.items()
]

# TODO: should be checked recursively on submodels!!
model_type = getattr(model.config, "model_type", None)
if model_type is not None:
model_specific_conversions = get_checkpoint_conversion_mapping(model_type)
if model_specific_conversions is not None:
weight_conversions.extend(model_specific_conversions)
# Model have several `PreTrainedModel` within with the same model type
# For ex: XForConditionalGeneration -> XModel. We don't want to apply the same
# conversion pattern twice because of that
Comment on lines +558 to +560
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we consider linking each regex with certain class in the future? It will also help with those dangling lm_head which are applied only for generation model and not base model

seen_model_types = set()
if (conversions := extract_weight_conversions_for_model(model)) is not None:
weight_conversions.extend(conversions)
seen_model_types.add(model.config.model_type)

# Recurse over submodules and collect all conversions
for submodule in model.modules():
if (
submodule is not model
and isinstance(submodule, PreTrainedModel)
and submodule.config.model_type not in seen_model_types
):
conversions = extract_weight_conversions_for_model(submodule)
if conversions is not None:
weight_conversions.extend(conversions)
seen_model_types.add(submodule.config.model_type)
Comment thread
zucchini-nlp marked this conversation as resolved.

if add_legacy:
weight_conversions.extend(get_checkpoint_conversion_mapping("legacy"))
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4100,7 +4100,7 @@ def from_pretrained(
# instantiated model, as the flags can be modified by instances sometimes)
dtype_plan = model._get_dtype_plan(dtype)

# Obtain the weight conversion mapping for this model if any are registered
# Obtain the weight conversion mapping for this model if any are registered and apply to all submodels recursively
weight_conversions = get_model_conversion_mapping(model, key_mapping, hf_quantizer)

if _torch_distributed_available and device_mesh is not None: # add hooks to nn.Modules: no weights
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1621,26 +1621,6 @@ def _set_aux_loss(self, outputs_class, outputs_coord):
"""
)
class ConditionalDetrForSegmentation(ConditionalDetrPreTrainedModel):
_checkpoint_conversion_mapping = {
"bbox_attention.q_linear": "bbox_attention.q_proj",
"bbox_attention.k_linear": "bbox_attention.k_proj",
# Mask head refactor
"mask_head.lay1": "mask_head.conv1.conv",
"mask_head.gn1": "mask_head.conv1.norm",
"mask_head.lay2": "mask_head.conv2.conv",
"mask_head.gn2": "mask_head.conv2.norm",
"mask_head.adapter1": "mask_head.fpn_stages.0.fpn_adapter",
"mask_head.lay3": "mask_head.fpn_stages.0.refine.conv",
"mask_head.gn3": "mask_head.fpn_stages.0.refine.norm",
"mask_head.adapter2": "mask_head.fpn_stages.1.fpn_adapter",
"mask_head.lay4": "mask_head.fpn_stages.1.refine.conv",
"mask_head.gn4": "mask_head.fpn_stages.1.refine.norm",
"mask_head.adapter3": "mask_head.fpn_stages.2.fpn_adapter",
"mask_head.lay5": "mask_head.fpn_stages.2.refine.conv",
"mask_head.gn5": "mask_head.fpn_stages.2.refine.norm",
"mask_head.out_lay": "mask_head.output_conv",
}

def __init__(self, config: ConditionalDetrConfig):
super().__init__(config)

Expand Down
20 changes: 0 additions & 20 deletions src/transformers/models/detr/modeling_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1435,26 +1435,6 @@ def forward(
"""
)
class DetrForSegmentation(DetrPreTrainedModel):
_checkpoint_conversion_mapping = {
"bbox_attention.q_linear": "bbox_attention.q_proj",
"bbox_attention.k_linear": "bbox_attention.k_proj",
Comment on lines -1438 to -1440
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved to conversion_mapping.py

# Mask head refactor
"mask_head.lay1": "mask_head.conv1.conv",
"mask_head.gn1": "mask_head.conv1.norm",
"mask_head.lay2": "mask_head.conv2.conv",
"mask_head.gn2": "mask_head.conv2.norm",
"mask_head.adapter1": "mask_head.fpn_stages.0.fpn_adapter",
"mask_head.lay3": "mask_head.fpn_stages.0.refine.conv",
"mask_head.gn3": "mask_head.fpn_stages.0.refine.norm",
"mask_head.adapter2": "mask_head.fpn_stages.1.fpn_adapter",
"mask_head.lay4": "mask_head.fpn_stages.1.refine.conv",
"mask_head.gn4": "mask_head.fpn_stages.1.refine.norm",
"mask_head.adapter3": "mask_head.fpn_stages.2.fpn_adapter",
"mask_head.lay5": "mask_head.fpn_stages.2.refine.conv",
"mask_head.gn5": "mask_head.fpn_stages.2.refine.norm",
"mask_head.out_lay": "mask_head.output_conv",
}

def __init__(self, config: DetrConfig):
super().__init__(config)

Expand Down
Loading
Loading