Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions src/transformers/configuration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from .generation.configuration_utils import GenerationConfig
from .modeling_gguf_pytorch_utils import load_gguf_checkpoint
from .modeling_rope_utils import RotaryEmbeddingConfigMixin
from .tokenization_utils_base import PreTrainedTokenizerBase
from .utils import (
CONFIG_NAME,
PushToHubMixin,
Expand Down Expand Up @@ -234,9 +233,6 @@ class PreTrainedConfig(PushToHubMixin, RotaryEmbeddingConfigMixin):
label2id: dict[str, int] | dict[str, str] | None = None
problem_type: Literal["regression", "single_label_classification", "multi_label_classification"] | None = None

Comment on lines 231 to 235

Copilot AI Mar 24, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing tokenizer_class from PreTrainedConfig will break existing tests/utilities that treat it as a common config kwarg/field (e.g. tests/utils/test_configuration_utils.py::test_config_common_kwargs_is_complete expects tokenizer_class to be present in PreTrainedConfig().__dict__). Please update the corresponding test expectations (and any shared config_common_kwargs/common-config logic) to reflect the new base config surface, or keep a backward-compatible tokenizer_class field if it’s still considered part of the common config contract.

Copilot uses AI. Check for mistakes.
# Tokenizer kwargs
tokenizer_class: str | PreTrainedTokenizerBase | None = None

def __post_init__(self, **kwargs):
# BC for the `torch_dtype` argument instead of the simpler `dtype`
# Do not warn, as it would otherwise always be triggered since most configs on the hub have `torch_dtype`
Expand Down
3 changes: 0 additions & 3 deletions src/transformers/models/mt5/configuration_mt5.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ class MT5Config(PreTrainedConfig):
The maximum distance of the longer sequences for the bucket separation.
feed_forward_proj (`str`, *optional*, defaults to `"gated-gelu"`):
Type of feed forward layer to be used. Should be one of `"relu"` or `"gated-gelu"`.
tokenizer_class (`str`, *optional*, defaults to `"T5Tokenizer"`):
The tokenizer's class name.
"""

model_type = "mt5"
Expand All @@ -57,7 +55,6 @@ class MT5Config(PreTrainedConfig):
feed_forward_proj: str = "gated-gelu"
is_encoder_decoder: bool = True
use_cache: bool = True
tokenizer_class: str = "T5Tokenizer"
tie_word_embeddings: bool = True
bos_token_id: int | None = None
pad_token_id: int | None = 0
Expand Down
3 changes: 0 additions & 3 deletions src/transformers/models/umt5/configuration_umt5.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ class UMT5Config(PreTrainedConfig):
The maximum distance of the longer sequences for the bucket separation.
feed_forward_proj (`str`, *optional*, defaults to `"gated-gelu"`):
Type of feed forward layer to be used. Should be one of `"relu"` or `"gated-gelu"`.
tokenizer_class (`str`, *optional*, defaults to `"T5Tokenizer"`):
The tokenizer's class name
"""

model_type = "umt5"
Expand All @@ -57,7 +55,6 @@ class UMT5Config(PreTrainedConfig):
feed_forward_proj: str = "gated-gelu"
is_encoder_decoder: bool = True
use_cache: bool = True
tokenizer_class: str = "T5Tokenizer"
pad_token_id: int | None = 0
eos_token_id: int | list[int] | None = 1
decoder_start_token_id: int | None = 0
Expand Down
Loading