Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
51 changes: 50 additions & 1 deletion src/transformers/configuration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
import math
import os
from collections.abc import Sequence
from dataclasses import dataclass
from dataclasses import MISSING, dataclass, fields
from functools import wraps
from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypeVar, Union

from huggingface_hub import create_repo
Expand Down Expand Up @@ -71,6 +72,46 @@
)


# copied from huggingface_hub.dataclasses.strict when `accept_kwargs=True`
def wrap_init_to_accept_kwargs(cls: dataclass):
original_init = cls.__init__

@wraps(original_init)
def __init__(self, *args, **kwargs: Any) -> None:
# Extract only the fields that are part of the dataclass
dataclass_fields = {f.name for f in fields(cls)}
standard_kwargs = {k: v for k, v in kwargs.items() if k in dataclass_fields}

# We need to call bare `__init__` without `__post_init__` but the `original_init` of
# any dataclas contains a call to post-init at the end (without kwargs)
if len(args) > 0:
raise ValueError(
f"{cls.__name__} accepts only keyword arguments, but found `{len(args)}` positional args."
)

for f in fields(cls): # type: ignore
if f.name in standard_kwargs:
setattr(self, f.name, standard_kwargs[f.name])
elif f.default is not MISSING:
setattr(self, f.name, f.default)
elif f.default_factory is not MISSING:
setattr(self, f.name, f.default_factory())
else:
raise TypeError(f"Missing required field - '{f.name}'")

# Pass any additional kwargs to `__post_init__` and let the object
# decide whether to set the attr or use for different purposes (e.g. BC checks)
additional_kwargs = {}
for name, value in kwargs.items():
if name not in dataclass_fields:
additional_kwargs[name] = value

self.__post_init__(**additional_kwargs)

cls.__init__ = __init__
return cls


@strict(accept_kwargs=True)
@dataclass(repr=False)
class PreTrainedConfig(PushToHubMixin, RotaryEmbeddingConfigMixin):
Expand Down Expand Up @@ -252,8 +293,16 @@ def __post_init__(self, **kwargs):

def __init_subclass__(cls, *args, **kwargs):
super().__init_subclass__(*args, **kwargs)
cls_has_custom_init = "__init__" in cls.__dict__
cls = dataclass(cls, repr=False)

if not cls_has_custom_init:
# Wrap all subclasses to accept arbitrary kwargs for BC
# only if the subclass has no custom `__init__`. Most
# remote code has an init defined, but some model are not
# See https://huggingface.co/hmellor/Ilama-3.2-1B/blob/main/configuration_ilama.py
cls = wrap_init_to_accept_kwargs(cls)

@property
def name_or_path(self) -> str | None:
return getattr(self, "_name_or_path", None)
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/afmoe/configuration_afmoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ...utils import auto_docstring


@strict(accept_kwargs=True)
@strict
@auto_docstring(
custom_intro="""
AFMoE is an Adaptive Feedforward MoE (Mixture of Experts) model with token-choice routing, shared experts, and a
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/aimv2/configuration_aimv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@


@auto_docstring(checkpoint="apple/aimv2-large-patch14-224-lit")
@strict(accept_kwargs=True)
@strict
class Aimv2VisionConfig(PreTrainedConfig):
r"""
use_head (`str`, *optional*, defaults to `True`):
Expand Down Expand Up @@ -72,7 +72,7 @@ class Aimv2VisionConfig(PreTrainedConfig):


@auto_docstring(checkpoint="apple/aimv2-large-patch14-224-lit")
@strict(accept_kwargs=True)
@strict
class Aimv2TextConfig(PreTrainedConfig):
r"""
Example:
Expand Down Expand Up @@ -111,7 +111,7 @@ def __post_init__(self, **kwargs):


@auto_docstring(checkpoint="apple/aimv2-large-patch14-224-lit")
@strict(accept_kwargs=True)
@strict
class Aimv2Config(PreTrainedConfig):
r"""
max_logit_scale (`float`, *optional*, defaults to `100.0`):
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/aimv2/modular_aimv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@


@auto_docstring(checkpoint="apple/aimv2-large-patch14-224-lit")
@strict(accept_kwargs=True)
@strict
class Aimv2VisionConfig(SiglipVisionConfig):
r"""
use_head (`str`, *optional*, defaults to `True`):
Expand Down Expand Up @@ -79,7 +79,7 @@ class Aimv2VisionConfig(SiglipVisionConfig):


@auto_docstring(checkpoint="apple/aimv2-large-patch14-224-lit")
@strict(accept_kwargs=True)
@strict
class Aimv2TextConfig(SiglipTextConfig):
vocab_size: int = 49408
hidden_size: int = 768
Expand All @@ -102,7 +102,7 @@ def __post_init__(self, **kwargs):


@auto_docstring(checkpoint="apple/aimv2-large-patch14-224-lit")
@strict(accept_kwargs=True)
@strict
class Aimv2Config(SiglipConfig):
r"""
max_logit_scale (`float`, *optional*, defaults to `100.0`):
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/albert/configuration_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


@auto_docstring(checkpoint="albert/albert-xxlarge-v2")
@strict(accept_kwargs=True)
@strict
class AlbertConfig(PreTrainedConfig):
r"""
inner_group_num (`int`, *optional*, defaults to 1):
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/align/configuration_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@


@auto_docstring(checkpoint="kakaobrain/align-base")
@strict(accept_kwargs=True)
@strict
class AlignTextConfig(PreTrainedConfig):
r"""
Example:
Expand Down Expand Up @@ -62,7 +62,7 @@ class AlignTextConfig(PreTrainedConfig):


@auto_docstring(checkpoint="kakaobrain/align-base")
@strict(accept_kwargs=True)
@strict
class AlignVisionConfig(PreTrainedConfig):
r"""
width_coefficient (`float`, *optional*, defaults to 2.0):
Expand Down Expand Up @@ -155,7 +155,7 @@ def __post_init__(self, **kwargs):


@auto_docstring(checkpoint="kakaobrain/align-base")
@strict(accept_kwargs=True)
@strict
class AlignConfig(PreTrainedConfig):
r"""
temperature_init_value (`float`, *optional*, defaults to 1.0):
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/altclip/configuration_altclip.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@


@auto_docstring(checkpoint="BAAI/AltCLIP")
@strict(accept_kwargs=True)
@strict
class AltCLIPTextConfig(PreTrainedConfig):
r"""
project_dim (`int`, *optional*, defaults to 768):
Expand Down Expand Up @@ -66,7 +66,7 @@ class AltCLIPTextConfig(PreTrainedConfig):


@auto_docstring(checkpoint="BAAI/AltCLIP")
@strict(accept_kwargs=True)
@strict
class AltCLIPVisionConfig(PreTrainedConfig):
r"""
Example:
Expand Down Expand Up @@ -103,7 +103,7 @@ class AltCLIPVisionConfig(PreTrainedConfig):


@auto_docstring(checkpoint="BAAI/AltCLIP")
@strict(accept_kwargs=True)
@strict
class AltCLIPConfig(PreTrainedConfig):
r"""
Example:
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/apertus/configuration_apertus.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@


@auto_docstring(checkpoint="swiss-ai/Apertus-8B-Instruct-2509")
@strict(accept_kwargs=True)
@strict
class ApertusConfig(PreTrainedConfig):
r"""
```python
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/apertus/modular_apertus.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@


@auto_docstring(checkpoint="swiss-ai/Apertus-8B-Instruct-2509")
@strict(accept_kwargs=True)
@strict
class ApertusConfig(PreTrainedConfig):
r"""
```python
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/arcee/configuration_arcee.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@


@auto_docstring(checkpoint="arcee-ai/AFM-4.5B")
@strict(accept_kwargs=True)
@strict
class ArceeConfig(PreTrainedConfig):
r"""
```python
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/arcee/modular_arcee.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@


@auto_docstring(checkpoint="arcee-ai/AFM-4.5B")
@strict(accept_kwargs=True)
@strict
class ArceeConfig(LlamaConfig):
r"""
```python
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/aria/configuration_aria.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@


@auto_docstring(checkpoint="rhymes-ai/Aria")
@strict(accept_kwargs=True)
@strict
class AriaTextConfig(PreTrainedConfig):
r"""
moe_num_experts (`int`, *optional*, defaults to 8):
Expand Down Expand Up @@ -100,7 +100,7 @@ def validate_architecture(self):


@auto_docstring(checkpoint="rhymes-ai/Aria")
@strict(accept_kwargs=True)
@strict
class AriaConfig(PreTrainedConfig):
r"""
projector_patch_to_query_dict (`dict`, *optional*):
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/aria/modular_aria.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def sequential_experts_gemm(token_states, expert_weights, tokens_per_expert):


@auto_docstring(checkpoint="rhymes-ai/Aria")
@strict(accept_kwargs=True)
@strict
class AriaTextConfig(LlamaConfig):
r"""
moe_num_experts (`int`, *optional*, defaults to 8):
Expand Down Expand Up @@ -129,7 +129,7 @@ class AriaTextConfig(LlamaConfig):


@auto_docstring(checkpoint="rhymes-ai/Aria")
@strict(accept_kwargs=True)
@strict
class AriaConfig(PreTrainedConfig):
r"""
projector_patch_to_query_dict (`dict`, *optional*):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


@auto_docstring(checkpoint="MIT/ast-finetuned-audioset-10-10-0.4593")
@strict(accept_kwargs=True)
@strict
class ASTConfig(PreTrainedConfig):
r"""
frequency_stride (`int`, *optional*, defaults to 10):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@


@auto_docstring(checkpoint="nvidia/audio-flamingo-3-hf")
@strict(accept_kwargs=True)
@strict
class AudioFlamingo3EncoderConfig(PreTrainedConfig):
r"""
max_source_positions (`int`, *optional*, defaults to 1500):
Expand Down Expand Up @@ -69,7 +69,7 @@ class AudioFlamingo3EncoderConfig(PreTrainedConfig):


@auto_docstring(checkpoint="nvidia/audio-flamingo-3-hf")
@strict(accept_kwargs=True)
@strict
class AudioFlamingo3Config(PreTrainedConfig):
r"""
Example:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


@auto_docstring(checkpoint="huggingface/autoformer-tourism-monthly")
@strict(accept_kwargs=True)
@strict
class AutoformerConfig(PreTrainedConfig):
r"""
prediction_length (`int`):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


@auto_docstring(checkpoint="CohereForAI/aya-vision-8b")
@strict(accept_kwargs=True)
@strict
class AyaVisionConfig(PreTrainedConfig):
r"""
downsample_factor (`int`, *optional*, defaults to 2):
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/bamba/configuration_bamba.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ...utils import auto_docstring


@strict(accept_kwargs=True)
@strict
@auto_docstring(
custom_intro="""
The BambaModel is a hybrid [mamba2](https://github.com/state-spaces/mamba) architecture with SwiGLU.
Expand Down
10 changes: 5 additions & 5 deletions src/transformers/models/bark/configuration_bark.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@


@auto_docstring(checkpoint="suno/bark")
@strict(accept_kwargs=True)
@strict
class BarkSubModelConfig(PreTrainedConfig):
r"""
block_size (`int`, *optional*, defaults to 1024):
Expand Down Expand Up @@ -64,7 +64,7 @@ class BarkSubModelConfig(PreTrainedConfig):


@auto_docstring(checkpoint="suno/bark")
@strict(accept_kwargs=True)
@strict
class BarkSemanticConfig(BarkSubModelConfig):
r"""
block_size (`int`, *optional*, defaults to 1024):
Expand Down Expand Up @@ -101,7 +101,7 @@ class BarkSemanticConfig(BarkSubModelConfig):


@auto_docstring(checkpoint="suno/bark")
@strict(accept_kwargs=True)
@strict
class BarkCoarseConfig(BarkSubModelConfig):
r"""
block_size (`int`, *optional*, defaults to 1024):
Expand Down Expand Up @@ -138,7 +138,7 @@ class BarkCoarseConfig(BarkSubModelConfig):


@auto_docstring(checkpoint="suno/bark")
@strict(accept_kwargs=True)
@strict
class BarkFineConfig(BarkSubModelConfig):
r"""
block_size (`int`, *optional*, defaults to 1024):
Expand Down Expand Up @@ -184,7 +184,7 @@ class BarkFineConfig(BarkSubModelConfig):


@auto_docstring(checkpoint="suno/bark")
@strict(accept_kwargs=True)
@strict
class BarkConfig(PreTrainedConfig):
r"""
semantic_config ([`BarkSemanticConfig`], *optional*):
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/bart/configuration_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


@auto_docstring(checkpoint="facebook/bart-large")
@strict(accept_kwargs=True)
@strict
class BartConfig(PreTrainedConfig):
r"""
Example:
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/beit/configuration_beit.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


@auto_docstring(checkpoint="microsoft/beit-base-patch16-224-pt22k")
@strict(accept_kwargs=True)
@strict
class BeitConfig(BackboneConfigMixin, PreTrainedConfig):
r"""
use_mask_token (`bool`, *optional*, defaults to `False`):
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/bert/configuration_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


@auto_docstring(checkpoint="google-bert/bert-base-uncased")
@strict(accept_kwargs=True)
@strict
class BertConfig(PreTrainedConfig):
r"""
Examples:
Expand Down
Loading
Loading