diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py
index 211bfa95dee3..afcae7ddd75c 100755
--- a/src/transformers/configuration_utils.py
+++ b/src/transformers/configuration_utils.py
@@ -32,7 +32,6 @@
 from .generation.configuration_utils import GenerationConfig
 from .modeling_gguf_pytorch_utils import load_gguf_checkpoint
 from .modeling_rope_utils import RotaryEmbeddingConfigMixin
-from .tokenization_utils_base import PreTrainedTokenizerBase
 from .utils import (
     CONFIG_NAME,
     PushToHubMixin,
@@ -234,9 +233,6 @@ class PreTrainedConfig(PushToHubMixin, RotaryEmbeddingConfigMixin):
     label2id: dict[str, int] | dict[str, str] | None = None
     problem_type: Literal["regression", "single_label_classification", "multi_label_classification"] | None = None
 
-    # Tokenizer kwargs
-    tokenizer_class: str | PreTrainedTokenizerBase | None = None
-
     def __post_init__(self, **kwargs):
         # BC for the `torch_dtype` argument instead of the simpler `dtype`
         # Do not warn, as it would otherwise always be triggered since most configs on the hub have `torch_dtype`
diff --git a/src/transformers/models/mt5/configuration_mt5.py b/src/transformers/models/mt5/configuration_mt5.py
index 7dbc5bb83a56..72bae9c43951 100644
--- a/src/transformers/models/mt5/configuration_mt5.py
+++ b/src/transformers/models/mt5/configuration_mt5.py
@@ -29,8 +29,6 @@ class MT5Config(PreTrainedConfig):
         The maximum distance of the longer sequences for the bucket separation.
     feed_forward_proj (`str`, *optional*, defaults to `"gated-gelu"`):
         Type of feed forward layer to be used. Should be one of `"relu"` or `"gated-gelu"`.
-    tokenizer_class (`str`, *optional*, defaults to `"T5Tokenizer"`):
-        The tokenizer's class name.
     """
 
     model_type = "mt5"
@@ -57,7 +55,6 @@ class MT5Config(PreTrainedConfig):
     feed_forward_proj: str = "gated-gelu"
     is_encoder_decoder: bool = True
     use_cache: bool = True
-    tokenizer_class: str = "T5Tokenizer"
     tie_word_embeddings: bool = True
     bos_token_id: int | None = None
     pad_token_id: int | None = 0
diff --git a/src/transformers/models/umt5/configuration_umt5.py b/src/transformers/models/umt5/configuration_umt5.py
index 5c52f13e610e..5d78f4632dea 100644
--- a/src/transformers/models/umt5/configuration_umt5.py
+++ b/src/transformers/models/umt5/configuration_umt5.py
@@ -29,8 +29,6 @@ class UMT5Config(PreTrainedConfig):
         The maximum distance of the longer sequences for the bucket separation.
     feed_forward_proj (`str`, *optional*, defaults to `"gated-gelu"`):
         Type of feed forward layer to be used. Should be one of `"relu"` or `"gated-gelu"`.
-    tokenizer_class (`str`, *optional*, defaults to `"T5Tokenizer"`):
-        The tokenizer's class name
     """
 
     model_type = "umt5"
@@ -57,7 +55,6 @@ class UMT5Config(PreTrainedConfig):
     feed_forward_proj: str = "gated-gelu"
     is_encoder_decoder: bool = True
     use_cache: bool = True
-    tokenizer_class: str = "T5Tokenizer"
     pad_token_id: int | None = 0
     eos_token_id: int | list[int] | None = 1
     decoder_start_token_id: int | None = 0
diff --git a/tests/utils/test_configuration_utils.py b/tests/utils/test_configuration_utils.py
index f4b1041274d4..227c979990dd 100644
--- a/tests/utils/test_configuration_utils.py
+++ b/tests/utils/test_configuration_utils.py
@@ -146,7 +146,6 @@ def test_config_common_kwargs_is_complete(self):
             [
                 "transformers_version",
                 "is_encoder_decoder",
-                "tokenizer_class",
                 "_name_or_path",
                 "_commit_hash",
                 "_output_attentions",