diff --git a/src/transformers/models/apertus/modeling_apertus.py b/src/transformers/models/apertus/modeling_apertus.py index 77a3d65478d6..4cb0d51b3cc7 100644 --- a/src/transformers/models/apertus/modeling_apertus.py +++ b/src/transformers/models/apertus/modeling_apertus.py @@ -94,6 +94,20 @@ def __init__(self, config: ApertusConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[ApertusConfig] = None, diff --git a/src/transformers/models/arcee/modeling_arcee.py b/src/transformers/models/arcee/modeling_arcee.py index 779f4a63e378..08ea9a1a879e 100644 --- a/src/transformers/models/arcee/modeling_arcee.py +++ b/src/transformers/models/arcee/modeling_arcee.py @@ -101,6 +101,20 @@ def __init__(self, config: ArceeConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[ArceeConfig] = None, diff --git a/src/transformers/models/aria/modeling_aria.py b/src/transformers/models/aria/modeling_aria.py index 96a6a82da91d..52902efb044a 100644 --- a/src/transformers/models/aria/modeling_aria.py +++ b/src/transformers/models/aria/modeling_aria.py @@ -636,6 +636,20 @@ def __init__(self, config: AriaTextConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[AriaTextConfig] = None, diff --git a/src/transformers/models/bamba/modeling_bamba.py b/src/transformers/models/bamba/modeling_bamba.py index 2428222e0dbe..321f23bd6eca 100644 --- a/src/transformers/models/bamba/modeling_bamba.py +++ b/src/transformers/models/bamba/modeling_bamba.py @@ -213,6 +213,20 @@ def __init__(self, config: BambaConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[BambaConfig] = None, diff --git a/src/transformers/models/bitnet/modeling_bitnet.py b/src/transformers/models/bitnet/modeling_bitnet.py index 73597dd98d82..37c144f45433 100644 --- a/src/transformers/models/bitnet/modeling_bitnet.py +++ b/src/transformers/models/bitnet/modeling_bitnet.py @@ -287,6 +287,20 @@ def __init__(self, config: BitNetConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[BitNetConfig] = None, diff --git a/src/transformers/models/blt/modeling_blt.py b/src/transformers/models/blt/modeling_blt.py index d4b19101c861..00eda014ef78 100644 --- a/src/transformers/models/blt/modeling_blt.py +++ b/src/transformers/models/blt/modeling_blt.py @@ -104,6 +104,20 @@ def __init__(self, config: BltConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[BltConfig] = None, diff --git a/src/transformers/models/chameleon/modeling_chameleon.py b/src/transformers/models/chameleon/modeling_chameleon.py index 1bf2179deec6..c8589b5ad493 100644 --- a/src/transformers/models/chameleon/modeling_chameleon.py +++ b/src/transformers/models/chameleon/modeling_chameleon.py @@ -85,6 +85,20 @@ def __init__(self, config: ChameleonConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[ChameleonConfig] = None, diff --git a/src/transformers/models/cohere/modeling_cohere.py b/src/transformers/models/cohere/modeling_cohere.py index 9fc2593d3175..866b94883903 100644 --- a/src/transformers/models/cohere/modeling_cohere.py +++ b/src/transformers/models/cohere/modeling_cohere.py @@ -84,6 +84,20 @@ def __init__(self, config: CohereConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[CohereConfig] = None, diff --git a/src/transformers/models/cohere2/modeling_cohere2.py b/src/transformers/models/cohere2/modeling_cohere2.py index a9c56cd2491c..78dc32392ddd 100644 --- a/src/transformers/models/cohere2/modeling_cohere2.py +++ b/src/transformers/models/cohere2/modeling_cohere2.py @@ -59,6 +59,20 @@ def __init__(self, config: Cohere2Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Cohere2Config] = None, diff --git a/src/transformers/models/csm/modeling_csm.py b/src/transformers/models/csm/modeling_csm.py index 87da76281717..37e111f30f5f 100644 --- a/src/transformers/models/csm/modeling_csm.py +++ b/src/transformers/models/csm/modeling_csm.py @@ -137,6 +137,20 @@ def __init__(self, config: CsmConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[CsmConfig] = None, diff --git a/src/transformers/models/cwm/modeling_cwm.py b/src/transformers/models/cwm/modeling_cwm.py index df9760ed1ba7..b8b3b35264d6 100644 --- a/src/transformers/models/cwm/modeling_cwm.py +++ b/src/transformers/models/cwm/modeling_cwm.py @@ -60,6 +60,20 @@ def __init__(self, config: CwmConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[CwmConfig] = None, diff --git a/src/transformers/models/dbrx/modeling_dbrx.py b/src/transformers/models/dbrx/modeling_dbrx.py index ddf5fce4dfce..943c400a15c7 100644 --- a/src/transformers/models/dbrx/modeling_dbrx.py +++ b/src/transformers/models/dbrx/modeling_dbrx.py @@ -59,6 +59,20 @@ def __init__(self, config: DbrxConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[DbrxConfig] = None, diff --git a/src/transformers/models/deepseek_v2/modeling_deepseek_v2.py b/src/transformers/models/deepseek_v2/modeling_deepseek_v2.py index 89230d7a80b2..272bfdccf37d 100644 --- a/src/transformers/models/deepseek_v2/modeling_deepseek_v2.py +++ b/src/transformers/models/deepseek_v2/modeling_deepseek_v2.py @@ -187,6 +187,20 @@ def __init__(self, config: DeepseekV2Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[DeepseekV2Config] = None, diff --git a/src/transformers/models/deepseek_v3/modeling_deepseek_v3.py b/src/transformers/models/deepseek_v3/modeling_deepseek_v3.py index cfd8d91dfb9a..25eff09f7028 100644 --- a/src/transformers/models/deepseek_v3/modeling_deepseek_v3.py +++ b/src/transformers/models/deepseek_v3/modeling_deepseek_v3.py @@ -73,6 +73,20 @@ def __init__(self, config: DeepseekV3Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[DeepseekV3Config] = None, diff --git a/src/transformers/models/dia/modeling_dia.py b/src/transformers/models/dia/modeling_dia.py index 3a0ddf6e3f90..4cd9fa46037a 100644 --- a/src/transformers/models/dia/modeling_dia.py +++ b/src/transformers/models/dia/modeling_dia.py @@ -147,6 +147,20 @@ def __init__(self, config: DiaConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[DiaConfig] = None, diff --git a/src/transformers/models/diffllama/modeling_diffllama.py b/src/transformers/models/diffllama/modeling_diffllama.py index 99524915b9f6..a5a7785a7546 100644 --- a/src/transformers/models/diffllama/modeling_diffllama.py +++ b/src/transformers/models/diffllama/modeling_diffllama.py @@ -88,6 +88,20 @@ def __init__(self, config: DiffLlamaConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[DiffLlamaConfig] = None, diff --git a/src/transformers/models/doge/modeling_doge.py b/src/transformers/models/doge/modeling_doge.py index b9ebf9856264..ed74bd8c6d0d 100644 --- a/src/transformers/models/doge/modeling_doge.py +++ b/src/transformers/models/doge/modeling_doge.py @@ -90,6 +90,20 @@ def __init__(self, config: DogeConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[DogeConfig] = None, diff --git a/src/transformers/models/dots1/modeling_dots1.py b/src/transformers/models/dots1/modeling_dots1.py index b8ae00b6ab60..79d9d1f380b9 100644 --- a/src/transformers/models/dots1/modeling_dots1.py +++ b/src/transformers/models/dots1/modeling_dots1.py @@ -82,6 +82,20 @@ def __init__(self, config: Dots1Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Dots1Config] = None, diff --git a/src/transformers/models/efficientloftr/modeling_efficientloftr.py b/src/transformers/models/efficientloftr/modeling_efficientloftr.py index bdf6dd67ae48..39b7fb92b097 100644 --- a/src/transformers/models/efficientloftr/modeling_efficientloftr.py +++ b/src/transformers/models/efficientloftr/modeling_efficientloftr.py @@ -104,6 +104,21 @@ def __init__(self, config: EfficientLoFTRConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + # Ignore copy + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod # Ignore copy def compute_default_rope_parameters( diff --git a/src/transformers/models/emu3/modeling_emu3.py b/src/transformers/models/emu3/modeling_emu3.py index 65671913b27f..bbd0f17ad395 100644 --- a/src/transformers/models/emu3/modeling_emu3.py +++ b/src/transformers/models/emu3/modeling_emu3.py @@ -1128,6 +1128,20 @@ def __init__(self, config: Emu3Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Emu3Config] = None, diff --git a/src/transformers/models/ernie4_5/modeling_ernie4_5.py b/src/transformers/models/ernie4_5/modeling_ernie4_5.py index b53ddf923e70..afb04a9d89e9 100644 --- a/src/transformers/models/ernie4_5/modeling_ernie4_5.py +++ b/src/transformers/models/ernie4_5/modeling_ernie4_5.py @@ -58,6 +58,20 @@ def __init__(self, config: Ernie4_5Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Ernie4_5Config] = None, diff --git a/src/transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py b/src/transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py index ccd05fe26347..8d66bb7fe12a 100644 --- a/src/transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +++ b/src/transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py @@ -98,6 +98,20 @@ def __init__(self, config: Ernie4_5_MoeConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Ernie4_5_MoeConfig] = None, diff --git a/src/transformers/models/evolla/modeling_evolla.py b/src/transformers/models/evolla/modeling_evolla.py index f4d0ce11255f..de899034ea8d 100644 --- a/src/transformers/models/evolla/modeling_evolla.py +++ b/src/transformers/models/evolla/modeling_evolla.py @@ -982,6 +982,20 @@ def __init__(self, config: EvollaConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[EvollaConfig] = None, diff --git a/src/transformers/models/exaone4/modeling_exaone4.py b/src/transformers/models/exaone4/modeling_exaone4.py index cb70c9cff142..2a9d2deb50ba 100644 --- a/src/transformers/models/exaone4/modeling_exaone4.py +++ b/src/transformers/models/exaone4/modeling_exaone4.py @@ -87,6 +87,20 @@ def __init__(self, config: Exaone4Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Exaone4Config] = None, diff --git a/src/transformers/models/falcon/modeling_falcon.py b/src/transformers/models/falcon/modeling_falcon.py index 085ff65644e3..706749221848 100644 --- a/src/transformers/models/falcon/modeling_falcon.py +++ b/src/transformers/models/falcon/modeling_falcon.py @@ -123,6 +123,20 @@ def __init__(self, config: FalconConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[FalconConfig] = None, diff --git a/src/transformers/models/falcon_h1/modeling_falcon_h1.py b/src/transformers/models/falcon_h1/modeling_falcon_h1.py index a6fd7a5aba99..8c3cd0e5171b 100644 --- a/src/transformers/models/falcon_h1/modeling_falcon_h1.py +++ b/src/transformers/models/falcon_h1/modeling_falcon_h1.py @@ -242,6 +242,20 @@ def __init__(self, config: FalconH1Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[FalconH1Config] = None, diff --git a/src/transformers/models/flex_olmo/modeling_flex_olmo.py b/src/transformers/models/flex_olmo/modeling_flex_olmo.py index b948b420ad63..0d6e745a4712 100644 --- a/src/transformers/models/flex_olmo/modeling_flex_olmo.py +++ b/src/transformers/models/flex_olmo/modeling_flex_olmo.py @@ -82,6 +82,20 @@ def __init__(self, config: FlexOlmoConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[FlexOlmoConfig] = None, diff --git a/src/transformers/models/gemma/modeling_gemma.py b/src/transformers/models/gemma/modeling_gemma.py index 8834ba8c3564..8e7b21d17821 100644 --- a/src/transformers/models/gemma/modeling_gemma.py +++ b/src/transformers/models/gemma/modeling_gemma.py @@ -99,6 +99,20 @@ def __init__(self, config: GemmaConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[GemmaConfig] = None, diff --git a/src/transformers/models/gemma2/modeling_gemma2.py b/src/transformers/models/gemma2/modeling_gemma2.py index 69e486032107..160a9d776137 100644 --- a/src/transformers/models/gemma2/modeling_gemma2.py +++ b/src/transformers/models/gemma2/modeling_gemma2.py @@ -103,6 +103,20 @@ def __init__(self, config: Gemma2Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Gemma2Config] = None, diff --git a/src/transformers/models/gemma3/modeling_gemma3.py b/src/transformers/models/gemma3/modeling_gemma3.py index 8e93ef9231b5..31ef9e0011b8 100644 --- a/src/transformers/models/gemma3/modeling_gemma3.py +++ b/src/transformers/models/gemma3/modeling_gemma3.py @@ -168,6 +168,20 @@ def __init__(self, config: Gemma3TextConfig, device=None, layer_type=None): setattr(self, f"{layer_type}_original_inv_freq", curr_inv_freq) setattr(self, f"{layer_type}_attention_scaling", curr_attention_scaling) + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Gemma3TextConfig] = None, diff --git a/src/transformers/models/gemma3n/modeling_gemma3n.py b/src/transformers/models/gemma3n/modeling_gemma3n.py index cc0b919bc85c..6eaccd0288e0 100644 --- a/src/transformers/models/gemma3n/modeling_gemma3n.py +++ b/src/transformers/models/gemma3n/modeling_gemma3n.py @@ -1639,6 +1639,20 @@ def __init__(self, config: Gemma3nTextConfig, device=None, layer_type=None): setattr(self, f"{layer_type}_original_inv_freq", curr_inv_freq) setattr(self, f"{layer_type}_attention_scaling", curr_attention_scaling) + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Gemma3nTextConfig] = None, diff --git a/src/transformers/models/glm/modeling_glm.py b/src/transformers/models/glm/modeling_glm.py index 8a508e2de54c..aa579136e95c 100644 --- a/src/transformers/models/glm/modeling_glm.py +++ b/src/transformers/models/glm/modeling_glm.py @@ -81,6 +81,20 @@ def __init__(self, config: GlmConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[GlmConfig] = None, diff --git a/src/transformers/models/glm4/modeling_glm4.py b/src/transformers/models/glm4/modeling_glm4.py index c982c36f9aab..d0998c2bc474 100644 --- a/src/transformers/models/glm4/modeling_glm4.py +++ b/src/transformers/models/glm4/modeling_glm4.py @@ -285,6 +285,20 @@ def __init__(self, config: Glm4Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Glm4Config] = None, diff --git a/src/transformers/models/glm4_moe/modeling_glm4_moe.py b/src/transformers/models/glm4_moe/modeling_glm4_moe.py index 84e6dd3bd77d..59db7a8800c3 100644 --- a/src/transformers/models/glm4_moe/modeling_glm4_moe.py +++ b/src/transformers/models/glm4_moe/modeling_glm4_moe.py @@ -62,6 +62,20 @@ def __init__(self, config: Glm4MoeConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Glm4MoeConfig] = None, diff --git a/src/transformers/models/glm4v/modeling_glm4v.py b/src/transformers/models/glm4v/modeling_glm4v.py index ff5e0a00cc0d..fc1200bc43a6 100644 --- a/src/transformers/models/glm4v/modeling_glm4v.py +++ b/src/transformers/models/glm4v/modeling_glm4v.py @@ -405,6 +405,20 @@ def __init__(self, config: Glm4vTextConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Glm4vTextConfig] = None, diff --git a/src/transformers/models/glm4v_moe/modeling_glm4v_moe.py b/src/transformers/models/glm4v_moe/modeling_glm4v_moe.py index 373d49bc942c..4c1aa238990f 100644 --- a/src/transformers/models/glm4v_moe/modeling_glm4v_moe.py +++ b/src/transformers/models/glm4v_moe/modeling_glm4v_moe.py @@ -109,6 +109,20 @@ def __init__(self, config: Glm4vMoeTextConfig, device=None, layer_type=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Glm4vMoeTextConfig] = None, diff --git a/src/transformers/models/gpt_neox/modeling_gpt_neox.py b/src/transformers/models/gpt_neox/modeling_gpt_neox.py index fc7d6fd40a80..8f0f89d47650 100755 --- a/src/transformers/models/gpt_neox/modeling_gpt_neox.py +++ b/src/transformers/models/gpt_neox/modeling_gpt_neox.py @@ -68,6 +68,20 @@ def __init__(self, config: GPTNeoXConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[GPTNeoXConfig] = None, diff --git a/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py b/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py index f723defcd088..e9ce296a3105 100755 --- a/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +++ b/src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py @@ -79,6 +79,20 @@ def __init__(self, config: GPTNeoXJapaneseConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[GPTNeoXJapaneseConfig] = None, diff --git a/src/transformers/models/gpt_oss/modeling_gpt_oss.py b/src/transformers/models/gpt_oss/modeling_gpt_oss.py index 8e1ce9df0b97..bb6d4bdc6d0f 100644 --- a/src/transformers/models/gpt_oss/modeling_gpt_oss.py +++ b/src/transformers/models/gpt_oss/modeling_gpt_oss.py @@ -191,6 +191,20 @@ def __init__(self, config: GptOssConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[GptOssConfig] = None, diff --git a/src/transformers/models/granite/modeling_granite.py b/src/transformers/models/granite/modeling_granite.py index 42de2e0724f3..69a507bc3ebb 100644 --- a/src/transformers/models/granite/modeling_granite.py +++ b/src/transformers/models/granite/modeling_granite.py @@ -337,6 +337,20 @@ def __init__(self, config: GraniteConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[GraniteConfig] = None, diff --git a/src/transformers/models/granitemoe/modeling_granitemoe.py b/src/transformers/models/granitemoe/modeling_granitemoe.py index f722ad416a2f..fd35d066ab55 100644 --- a/src/transformers/models/granitemoe/modeling_granitemoe.py +++ b/src/transformers/models/granitemoe/modeling_granitemoe.py @@ -82,6 +82,20 @@ def __init__(self, config: GraniteMoeConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[GraniteMoeConfig] = None, diff --git a/src/transformers/models/granitemoehybrid/modeling_granitemoehybrid.py b/src/transformers/models/granitemoehybrid/modeling_granitemoehybrid.py index c9e7245956f3..2dc56e0aad96 100644 --- a/src/transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +++ b/src/transformers/models/granitemoehybrid/modeling_granitemoehybrid.py @@ -881,6 +881,20 @@ def __init__(self, config: GraniteMoeHybridConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[GraniteMoeHybridConfig] = None, diff --git a/src/transformers/models/granitemoeshared/modeling_granitemoeshared.py b/src/transformers/models/granitemoeshared/modeling_granitemoeshared.py index 606a59390e6e..6b65b3991ba4 100644 --- a/src/transformers/models/granitemoeshared/modeling_granitemoeshared.py +++ b/src/transformers/models/granitemoeshared/modeling_granitemoeshared.py @@ -494,6 +494,20 @@ def __init__(self, config: GraniteMoeSharedConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[GraniteMoeSharedConfig] = None, diff --git a/src/transformers/models/helium/modeling_helium.py b/src/transformers/models/helium/modeling_helium.py index e616da3cd07b..4111a6f19a0e 100644 --- a/src/transformers/models/helium/modeling_helium.py +++ b/src/transformers/models/helium/modeling_helium.py @@ -80,6 +80,20 @@ def __init__(self, config: HeliumConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[HeliumConfig] = None, diff --git a/src/transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py b/src/transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py index 4d184a0b1982..8ac8f9515700 100644 --- a/src/transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +++ b/src/transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py @@ -320,6 +320,20 @@ def __init__(self, config: HunYuanDenseV1Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[HunYuanDenseV1Config] = None, diff --git a/src/transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py b/src/transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py index 281a50a9e2cc..5caaf2636eee 100644 --- a/src/transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +++ b/src/transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py @@ -414,6 +414,20 @@ def __init__(self, config: HunYuanMoEV1Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[HunYuanMoEV1Config] = None, diff --git a/src/transformers/models/jetmoe/modeling_jetmoe.py b/src/transformers/models/jetmoe/modeling_jetmoe.py index b102a111e10f..24dab1f39dbc 100644 --- a/src/transformers/models/jetmoe/modeling_jetmoe.py +++ b/src/transformers/models/jetmoe/modeling_jetmoe.py @@ -85,6 +85,20 @@ def __init__(self, config: JetMoeConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[JetMoeConfig] = None, diff --git a/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py b/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py index 1e95b92d528d..4a8f8f4f1c91 100644 --- a/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +++ b/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py @@ -278,6 +278,20 @@ def __init__(self, config: KyutaiSpeechToTextConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[KyutaiSpeechToTextConfig] = None, diff --git a/src/transformers/models/lfm2/modeling_lfm2.py b/src/transformers/models/lfm2/modeling_lfm2.py index f1d639d16bbd..0f0b6005a3d6 100644 --- a/src/transformers/models/lfm2/modeling_lfm2.py +++ b/src/transformers/models/lfm2/modeling_lfm2.py @@ -85,6 +85,20 @@ def __init__(self, config: Lfm2Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Lfm2Config] = None, diff --git a/src/transformers/models/lfm2_moe/modeling_lfm2_moe.py b/src/transformers/models/lfm2_moe/modeling_lfm2_moe.py index 73b9c4a8fde0..f0d274cd4a55 100644 --- a/src/transformers/models/lfm2_moe/modeling_lfm2_moe.py +++ b/src/transformers/models/lfm2_moe/modeling_lfm2_moe.py @@ -87,6 +87,20 @@ def __init__(self, config: Lfm2MoeConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Lfm2MoeConfig] = None, diff --git a/src/transformers/models/llama/modeling_llama.py b/src/transformers/models/llama/modeling_llama.py index e3adac5d117d..5280d1600af7 100644 --- a/src/transformers/models/llama/modeling_llama.py +++ b/src/transformers/models/llama/modeling_llama.py @@ -89,6 +89,20 @@ def __init__(self, config: LlamaConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[LlamaConfig] = None, diff --git a/src/transformers/models/llama4/modeling_llama4.py b/src/transformers/models/llama4/modeling_llama4.py index 231e04c8eba2..8a162f7cfd94 100644 --- a/src/transformers/models/llama4/modeling_llama4.py +++ b/src/transformers/models/llama4/modeling_llama4.py @@ -190,6 +190,20 @@ def __init__(self, config: Llama4TextConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Llama4TextConfig] = None, diff --git a/src/transformers/models/longcat_flash/modeling_longcat_flash.py b/src/transformers/models/longcat_flash/modeling_longcat_flash.py index 4135bce33d83..9ba7032dd11d 100644 --- a/src/transformers/models/longcat_flash/modeling_longcat_flash.py +++ b/src/transformers/models/longcat_flash/modeling_longcat_flash.py @@ -84,6 +84,20 @@ def __init__(self, config: LongcatFlashConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[LongcatFlashConfig] = None, diff --git a/src/transformers/models/mimi/modeling_mimi.py b/src/transformers/models/mimi/modeling_mimi.py index 7fca9c22e5f7..2e8a6151cc53 100644 --- a/src/transformers/models/mimi/modeling_mimi.py +++ b/src/transformers/models/mimi/modeling_mimi.py @@ -522,6 +522,20 @@ def __init__(self, config: MimiConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[MimiConfig] = None, diff --git a/src/transformers/models/minimax/modeling_minimax.py b/src/transformers/models/minimax/modeling_minimax.py index 004ed68cef23..7bb7b0571b4d 100644 --- a/src/transformers/models/minimax/modeling_minimax.py +++ b/src/transformers/models/minimax/modeling_minimax.py @@ -273,6 +273,20 @@ def __init__(self, config: MiniMaxConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[MiniMaxConfig] = None, diff --git a/src/transformers/models/ministral/modeling_ministral.py b/src/transformers/models/ministral/modeling_ministral.py index b1c8555fd96b..9084dcdd1d3d 100644 --- a/src/transformers/models/ministral/modeling_ministral.py +++ b/src/transformers/models/ministral/modeling_ministral.py @@ -289,6 +289,20 @@ def __init__(self, config: MinistralConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[MinistralConfig] = None, diff --git a/src/transformers/models/mistral/modeling_mistral.py b/src/transformers/models/mistral/modeling_mistral.py index 60c7e2d49eed..78bc293e2cd5 100644 --- a/src/transformers/models/mistral/modeling_mistral.py +++ b/src/transformers/models/mistral/modeling_mistral.py @@ -284,6 +284,20 @@ def __init__(self, config: MistralConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[MistralConfig] = None, diff --git a/src/transformers/models/mixtral/modeling_mixtral.py b/src/transformers/models/mixtral/modeling_mixtral.py index 1faff1f4dcea..37fbfb3c8892 100644 --- a/src/transformers/models/mixtral/modeling_mixtral.py +++ b/src/transformers/models/mixtral/modeling_mixtral.py @@ -172,6 +172,20 @@ def __init__(self, config: MixtralConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[MixtralConfig] = None, diff --git a/src/transformers/models/mllama/modeling_mllama.py b/src/transformers/models/mllama/modeling_mllama.py index a2d303782bdd..17d36bd4eb9b 100644 --- a/src/transformers/models/mllama/modeling_mllama.py +++ b/src/transformers/models/mllama/modeling_mllama.py @@ -743,6 +743,20 @@ def __init__(self, config: MllamaTextConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[MllamaTextConfig] = None, diff --git a/src/transformers/models/modernbert/modeling_modernbert.py b/src/transformers/models/modernbert/modeling_modernbert.py index 8069f2bec2ff..14aad9f316f2 100644 --- a/src/transformers/models/modernbert/modeling_modernbert.py +++ b/src/transformers/models/modernbert/modeling_modernbert.py @@ -269,6 +269,20 @@ def __init__(self, config: ModernBertConfig, device=None): setattr(self, f"{layer_type}_original_inv_freq", curr_inv_freq) setattr(self, f"{layer_type}_attention_scaling", curr_attention_scaling) + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[ModernBertConfig] = None, diff --git a/src/transformers/models/modernbert_decoder/modeling_modernbert_decoder.py b/src/transformers/models/modernbert_decoder/modeling_modernbert_decoder.py index 7564e375716b..1c20bf0e33b2 100644 --- a/src/transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +++ b/src/transformers/models/modernbert_decoder/modeling_modernbert_decoder.py @@ -121,6 +121,20 @@ def __init__(self, config: ModernBertDecoderConfig, device=None): setattr(self, f"{layer_type}_original_inv_freq", curr_inv_freq) setattr(self, f"{layer_type}_attention_scaling", curr_attention_scaling) + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[ModernBertDecoderConfig] = None, diff --git a/src/transformers/models/moonshine/modeling_moonshine.py b/src/transformers/models/moonshine/modeling_moonshine.py index 373e1db4a217..239c61a1445f 100644 --- a/src/transformers/models/moonshine/modeling_moonshine.py +++ b/src/transformers/models/moonshine/modeling_moonshine.py @@ -98,6 +98,20 @@ def __init__(self, config: MoonshineConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[MoonshineConfig] = None, diff --git a/src/transformers/models/moshi/modeling_moshi.py b/src/transformers/models/moshi/modeling_moshi.py index 09a2bcbaf430..b1735ac76716 100644 --- a/src/transformers/models/moshi/modeling_moshi.py +++ b/src/transformers/models/moshi/modeling_moshi.py @@ -290,6 +290,20 @@ def __init__(self, config: MoshiConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[MoshiConfig] = None, diff --git a/src/transformers/models/nemotron/modeling_nemotron.py b/src/transformers/models/nemotron/modeling_nemotron.py index af1d14ee2da0..b20acc0851c9 100644 --- a/src/transformers/models/nemotron/modeling_nemotron.py +++ b/src/transformers/models/nemotron/modeling_nemotron.py @@ -111,6 +111,20 @@ def __init__(self, config: NemotronConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod # Ignore copy def compute_default_rope_parameters( diff --git a/src/transformers/models/olmo/modeling_olmo.py b/src/transformers/models/olmo/modeling_olmo.py index 2ba7a25f71b5..f7bb0afda9a8 100644 --- a/src/transformers/models/olmo/modeling_olmo.py +++ b/src/transformers/models/olmo/modeling_olmo.py @@ -94,6 +94,20 @@ def __init__(self, config: OlmoConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[OlmoConfig] = None, diff --git a/src/transformers/models/olmo2/modeling_olmo2.py b/src/transformers/models/olmo2/modeling_olmo2.py index 44e3592157af..d98b9f0bc4cd 100644 --- a/src/transformers/models/olmo2/modeling_olmo2.py +++ b/src/transformers/models/olmo2/modeling_olmo2.py @@ -87,6 +87,20 @@ def __init__(self, config: Olmo2Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Olmo2Config] = None, diff --git a/src/transformers/models/olmo3/modeling_olmo3.py b/src/transformers/models/olmo3/modeling_olmo3.py index d49570982f48..ce4371286b36 100644 --- a/src/transformers/models/olmo3/modeling_olmo3.py +++ b/src/transformers/models/olmo3/modeling_olmo3.py @@ -294,6 +294,20 @@ def __init__(self, config: Olmo3Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Olmo3Config] = None, diff --git a/src/transformers/models/olmoe/modeling_olmoe.py b/src/transformers/models/olmoe/modeling_olmoe.py index f078518e0c1f..a6f802f94457 100644 --- a/src/transformers/models/olmoe/modeling_olmoe.py +++ b/src/transformers/models/olmoe/modeling_olmoe.py @@ -79,6 +79,20 @@ def __init__(self, config: OlmoeConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[OlmoeConfig] = None, diff --git a/src/transformers/models/persimmon/modeling_persimmon.py b/src/transformers/models/persimmon/modeling_persimmon.py index 4b09a2dd75bf..ed7012ba1162 100644 --- a/src/transformers/models/persimmon/modeling_persimmon.py +++ b/src/transformers/models/persimmon/modeling_persimmon.py @@ -78,6 +78,20 @@ def __init__(self, config: PersimmonConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod # Ignore copy def compute_default_rope_parameters( diff --git a/src/transformers/models/phi/modeling_phi.py b/src/transformers/models/phi/modeling_phi.py index 4a1530b78564..3beee5798cda 100644 --- a/src/transformers/models/phi/modeling_phi.py +++ b/src/transformers/models/phi/modeling_phi.py @@ -50,6 +50,20 @@ def __init__(self, config: PhiConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[PhiConfig] = None, diff --git a/src/transformers/models/phi3/modeling_phi3.py b/src/transformers/models/phi3/modeling_phi3.py index 29b3d2847ed1..dfea731126c9 100644 --- a/src/transformers/models/phi3/modeling_phi3.py +++ b/src/transformers/models/phi3/modeling_phi3.py @@ -84,6 +84,20 @@ def __init__(self, config: Phi3Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Phi3Config] = None, diff --git a/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py b/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py index eab15068d252..d0d58089e0c5 100644 --- a/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +++ b/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py @@ -1461,6 +1461,20 @@ def __init__(self, config: Phi4MultimodalConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Phi4MultimodalConfig] = None, diff --git a/src/transformers/models/phimoe/modeling_phimoe.py b/src/transformers/models/phimoe/modeling_phimoe.py index 12e41214094d..6206b4fe909a 100644 --- a/src/transformers/models/phimoe/modeling_phimoe.py +++ b/src/transformers/models/phimoe/modeling_phimoe.py @@ -61,6 +61,20 @@ def __init__(self, config: PhimoeConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[PhimoeConfig] = None, diff --git a/src/transformers/models/qwen2/modeling_qwen2.py b/src/transformers/models/qwen2/modeling_qwen2.py index 1215f3677603..4adb9842a05c 100644 --- a/src/transformers/models/qwen2/modeling_qwen2.py +++ b/src/transformers/models/qwen2/modeling_qwen2.py @@ -66,6 +66,20 @@ def __init__(self, config: Qwen2Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Qwen2Config] = None, diff --git a/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py index 0826873a8f98..85aed1865063 100644 --- a/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +++ b/src/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py @@ -1251,6 +1251,20 @@ def __init__(self, config: Qwen2_5OmniThinkerConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Qwen2_5OmniConfig] = None, @@ -2525,6 +2539,20 @@ def __init__(self, config: Qwen2_5OmniDiTConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Qwen2_5OmniDiTConfig] = None, diff --git a/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py b/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py index b18e1e9f24dd..75600487df2c 100644 --- a/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +++ b/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py @@ -508,6 +508,20 @@ def __init__(self, config: Qwen2_5_VLConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Qwen2_5_VLConfig] = None, diff --git a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py index 8bda140d3cdb..c0a931352f26 100644 --- a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py +++ b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py @@ -92,6 +92,20 @@ def __init__(self, config: Qwen2MoeConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Qwen2MoeConfig] = None, diff --git a/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py b/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py index e058cceb1fa9..afbf4985851c 100644 --- a/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py +++ b/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py @@ -127,6 +127,20 @@ def __init__(self, config: Qwen2VLConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Qwen2VLConfig] = None, diff --git a/src/transformers/models/qwen3/modeling_qwen3.py b/src/transformers/models/qwen3/modeling_qwen3.py index 5f0f8974eb0a..eb4cfa29ccda 100644 --- a/src/transformers/models/qwen3/modeling_qwen3.py +++ b/src/transformers/models/qwen3/modeling_qwen3.py @@ -102,6 +102,20 @@ def __init__(self, config: Qwen3Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Qwen3Config] = None, diff --git a/src/transformers/models/qwen3_moe/modeling_qwen3_moe.py b/src/transformers/models/qwen3_moe/modeling_qwen3_moe.py index 477694d5fb2b..13f14ce0951c 100644 --- a/src/transformers/models/qwen3_moe/modeling_qwen3_moe.py +++ b/src/transformers/models/qwen3_moe/modeling_qwen3_moe.py @@ -402,6 +402,20 @@ def __init__(self, config: Qwen3MoeConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Qwen3MoeConfig] = None, diff --git a/src/transformers/models/qwen3_next/modeling_qwen3_next.py b/src/transformers/models/qwen3_next/modeling_qwen3_next.py index 362c8fab007f..acbde512b7db 100644 --- a/src/transformers/models/qwen3_next/modeling_qwen3_next.py +++ b/src/transformers/models/qwen3_next/modeling_qwen3_next.py @@ -193,6 +193,20 @@ def __init__(self, config: Qwen3NextConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Qwen3NextConfig] = None, diff --git a/src/transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py b/src/transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py index 1be0487cea98..041d4ef80595 100644 --- a/src/transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +++ b/src/transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py @@ -1249,6 +1249,20 @@ def __init__(self, config: Qwen3OmniMoeTextConfig, device=None): self.mrope_section = config.rope_parameters.get("mrope_section", [24, 20, 20]) + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Qwen3OmniMoeTextConfig] = None, @@ -2479,6 +2493,20 @@ def __init__(self, config: Qwen3OmniMoeConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Qwen3OmniMoeConfig] = None, diff --git a/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py b/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py index d215f689da65..8b4a87a2e5ae 100644 --- a/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py +++ b/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py @@ -296,6 +296,20 @@ def __init__(self, config: Qwen3VLTextConfig, device=None): self.mrope_section = config.rope_parameters.get("mrope_section", [24, 20, 20]) + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Qwen3VLTextConfig] = None, diff --git a/src/transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py b/src/transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py index 4d7b41f290a5..9e0830e4fd58 100644 --- a/src/transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +++ b/src/transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py @@ -817,6 +817,20 @@ def __init__(self, config: Qwen3VLMoeTextConfig, device=None): self.mrope_section = config.rope_parameters.get("mrope_section", [24, 20, 20]) + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Qwen3VLMoeTextConfig] = None, diff --git a/src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py b/src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py index dc1a3d4951e2..34a760ba120e 100644 --- a/src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +++ b/src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py @@ -81,6 +81,20 @@ def __init__(self, config: RecurrentGemmaConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod # Ignore copy def compute_default_rope_parameters( diff --git a/src/transformers/models/seed_oss/modeling_seed_oss.py b/src/transformers/models/seed_oss/modeling_seed_oss.py index 682193ca8d51..f1549ff12685 100644 --- a/src/transformers/models/seed_oss/modeling_seed_oss.py +++ b/src/transformers/models/seed_oss/modeling_seed_oss.py @@ -312,6 +312,20 @@ def __init__(self, config: SeedOssConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[SeedOssConfig] = None, diff --git a/src/transformers/models/smollm3/modeling_smollm3.py b/src/transformers/models/smollm3/modeling_smollm3.py index e23d4993e84c..4eccbbc35387 100644 --- a/src/transformers/models/smollm3/modeling_smollm3.py +++ b/src/transformers/models/smollm3/modeling_smollm3.py @@ -65,6 +65,20 @@ def __init__(self, config: SmolLM3Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[SmolLM3Config] = None, diff --git a/src/transformers/models/stablelm/modeling_stablelm.py b/src/transformers/models/stablelm/modeling_stablelm.py index 3b091726fab4..f2cd4ab2b8d7 100755 --- a/src/transformers/models/stablelm/modeling_stablelm.py +++ b/src/transformers/models/stablelm/modeling_stablelm.py @@ -77,6 +77,20 @@ def __init__(self, config: StableLmConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod # Ignore copy def compute_default_rope_parameters( diff --git a/src/transformers/models/starcoder2/modeling_starcoder2.py b/src/transformers/models/starcoder2/modeling_starcoder2.py index 042033fe3565..25072ba3899e 100644 --- a/src/transformers/models/starcoder2/modeling_starcoder2.py +++ b/src/transformers/models/starcoder2/modeling_starcoder2.py @@ -287,6 +287,20 @@ def __init__(self, config: Starcoder2Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Starcoder2Config] = None, diff --git a/src/transformers/models/t5gemma/modeling_t5gemma.py b/src/transformers/models/t5gemma/modeling_t5gemma.py index ac9b64929280..adf9d8ce69d6 100644 --- a/src/transformers/models/t5gemma/modeling_t5gemma.py +++ b/src/transformers/models/t5gemma/modeling_t5gemma.py @@ -109,6 +109,20 @@ def __init__(self, config: T5GemmaConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[T5GemmaConfig] = None, diff --git a/src/transformers/models/vaultgemma/modeling_vaultgemma.py b/src/transformers/models/vaultgemma/modeling_vaultgemma.py index ee36d7519a53..fa30fdeb0cb4 100644 --- a/src/transformers/models/vaultgemma/modeling_vaultgemma.py +++ b/src/transformers/models/vaultgemma/modeling_vaultgemma.py @@ -307,6 +307,20 @@ def __init__(self, config: VaultGemmaConfig, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[VaultGemmaConfig] = None, diff --git a/src/transformers/models/zamba2/modeling_zamba2.py b/src/transformers/models/zamba2/modeling_zamba2.py index 5b5f532cebf7..25dd740f521c 100644 --- a/src/transformers/models/zamba2/modeling_zamba2.py +++ b/src/transformers/models/zamba2/modeling_zamba2.py @@ -225,6 +225,20 @@ def __init__(self, config: Zamba2Config, device=None): self.register_buffer("inv_freq", inv_freq, persistent=False) self.original_inv_freq = inv_freq + # Add a compatibility method so callers expecting PreTrainedModel-like API don't crash. + def get_parameter_or_buffer(self, name: str): + # Prefer direct attribute access (parameters and buffers are attributes) + if hasattr(self, name): + return getattr(self, name) + # Fallback: search named parameters and buffers (non-recursive to keep semantics) + for n, p in self.named_parameters(recurse=False): + if n == name: + return p + for n, b in self.named_buffers(recurse=False): + if n == name: + return b + raise AttributeError(f"{self.__class__.__name__} has no parameter or buffer named '{name}'") + @staticmethod def compute_default_rope_parameters( config: Optional[Zamba2Config] = None,