diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py
index 781a4d4603a9..f1b66f752da4 100644
--- a/src/transformers/feature_extraction_utils.py
+++ b/src/transformers/feature_extraction_utils.py
@@ -79,7 +79,8 @@ def __init__(
         skip_tensor_conversion: list[str] | set[str] | None = None,
     ):
         super().__init__(data)
-        self.convert_to_tensors(tensor_type=tensor_type, skip_tensor_conversion=skip_tensor_conversion)
+        self.skip_tensor_conversion = skip_tensor_conversion
+        self.convert_to_tensors(tensor_type=tensor_type)
 
     def __getitem__(self, item: str) -> Any:
         """
@@ -178,6 +179,9 @@ def convert_to_tensors(
             return self
 
         is_tensor, as_tensor = self._get_is_as_tensor_fns(tensor_type)
+        skip_tensor_conversion = (
+            skip_tensor_conversion if skip_tensor_conversion is not None else self.skip_tensor_conversion
+        )
 
         # Do the tensor conversion in batch
         for key, value in self.items():
diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
index 5778119424b4..934c4d9a2d3b 100644
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -2312,6 +2312,14 @@ def _initialize_weights(self, module):
         if getattr(module, "_is_hf_initialized", False):
             return
 
+        if (
+            (weight := getattr(module, "weight", None)) is not None
+            and getattr(weight, "_is_hf_initialized", False)
+            and not list(module.named_buffers())
+        ):
+            module._is_hf_initialized = True
+            return
+
         self._init_weights(module)
         module._is_hf_initialized = True
 
@@ -4202,6 +4210,9 @@ def _finalize_model_loading(
         missing keys from meta device to their expected device, reinitializing missing weights according to proper
         distributions, tying the weights and logging the loading report."""
         try:
+            # Adjust `all_tied_weights_keys` before marking them as initialized
+            model._adjust_tied_keys_with_tied_pointers(loading_info.missing_and_mismatched())
+
             # Marks tied weights as `_is_hf_initialized` to avoid initializing them (it's very important for efficiency)
             model.mark_tied_weights_as_initialized()
 
@@ -4417,6 +4428,35 @@ def get_compiled_call(self, compile_config: CompileConfig | None) -> Callable:
     def is_backend_compatible(cls):
         return cls._supports_attention_backend
 
+    def _adjust_tied_keys_with_tied_pointers(self, missing_keys: list[str]) -> None:
+        """
+        Adds keys to `self.all_tied_weights_keys` by checking if any group of params
+        share the same data ptr. It helps us support remote code where the weight tying is
+        done in old-T5 style, by manually assigning the same module to different param names.
+        If we don't add them back in `self.all_tied_weights_keys`, they will be re-initialized
+        and all params in tied group get random weights.
+        """
+        param_pointers = defaultdict(list)
+        for param_name, param_value in self.state_dict().items():
+            param_pointers[param_value.data_ptr()].append(param_name)
+
+        # Filter out params that are already in `self.all_tied_weights_keys` or if all
+        # are missing params. Missing param groups share the same data ptr by being on `meta`
+        tied_param_names = [
+            names
+            for names in param_pointers.values()
+            if len(names) > 1
+            and not any(name in self.all_tied_weights_keys.keys() for name in names)
+            and not all(name in missing_keys for name in names)
+        ]
+
+        # Create a dummy mapping, it doesn't matter which one is source/target
+        # because they are already tied
+        tied_weights_keys_by_pointers = {
+            param_name: group[0] for group in tied_param_names for param_name in group[1:]
+        }
+        self.all_tied_weights_keys.update(tied_weights_keys_by_pointers)
+
     def _move_missing_keys_from_meta_to_device(
         self,
         missing_keys: list[str],
diff --git a/src/transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py
index 08ec7cf4e6e3..3d2254e2acf7 100644
--- a/src/transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py
+++ b/src/transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py
@@ -264,6 +264,8 @@ class Qwen2_5OmniTextConfig(PreTrainedConfig):
             with longer `max_position_embeddings`.
         initializer_range (`float`, *optional*, defaults to 0.02):
             The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        tie_word_embeddings (`bool`, *optional*, defaults to `True`):
+            Whether to tie weight embeddings
 
     Example:
 
@@ -328,6 +330,7 @@ def __init__(
         pad_token_id: int | None = None,
         bos_token_id: int | None = None,
         eos_token_id: int | None = None,
+        tie_word_embeddings: bool | None = True,
         **kwargs,
     ):
         self.vocab_size = vocab_size
@@ -342,6 +345,7 @@ def __init__(
         self.pad_token_id = pad_token_id
         self.bos_token_id = bos_token_id
         self.eos_token_id = eos_token_id
+        self.tie_word_embeddings = tie_word_embeddings
 
         # for backward compatibility
         if num_key_value_heads is None:
diff --git a/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py
index c8b70ed0807e..f1ff53230297 100644
--- a/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py
+++ b/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py
@@ -304,6 +304,8 @@ class Qwen2_5OmniTextConfig(PreTrainedConfig):
             with longer `max_position_embeddings`.
         initializer_range (`float`, *optional*, defaults to 0.02):
             The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        tie_word_embeddings (`bool`, *optional*, defaults to `True`):
+            Whether to tie weight embeddings
 
     Example:
 
@@ -368,6 +370,7 @@ def __init__(
         pad_token_id: int | None = None,
         bos_token_id: int | None = None,
         eos_token_id: int | None = None,
+        tie_word_embeddings: bool | None = True,
         **kwargs,
     ):
         self.vocab_size = vocab_size
@@ -382,6 +385,7 @@ def __init__(
         self.pad_token_id = pad_token_id
         self.bos_token_id = bos_token_id
         self.eos_token_id = eos_token_id
+        self.tie_word_embeddings = tie_word_embeddings
 
         # for backward compatibility
         if num_key_value_heads is None:
diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py
index 5924099efe20..8c1199cb3f7f 100644
--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -228,15 +228,13 @@ class methods and docstrings.
 
     Attributes:
         do_convert_rgb (`bool`):
-            Whether to convert the video to RGB format.
+            Whether to convert the image to RGB format.
         do_resize (`bool`, *optional*):
             Whether to resize the image.
         size (`dict[str, int]`, *optional*):
             Resize the shorter side of the input to `size["shortest_edge"]`.
         crop_size (`dict[str, int]`, *optional*):
             Desired output size when applying center-cropping.
-        do_convert_rgb (`bool`):
-            Whether to convert the video to RGB format.
         resample (`PILImageResampling`, *optional*):
             Resampling filter to use if resizing the image.
         do_rescale (`bool`, *optional*):
diff --git a/src/transformers/tokenization_python.py b/src/transformers/tokenization_python.py
index de6326a7ccd4..9f8702f5b2a1 100644
--- a/src/transformers/tokenization_python.py
+++ b/src/transformers/tokenization_python.py
@@ -433,7 +433,7 @@ def __init__(self, **kwargs):
 
         # 5. Special tokens mask configuration
         # Patterns: "none", "cls_sep", "eos", "bos", "bos_eos", "cls_double_sep", "prefix_suffix"
-        self.special_tokens_pattern = kwargs.pop("special_tokens_pattern", "cls_sep")
+        self.special_tokens_pattern = kwargs.pop("special_tokens_pattern", None)
 
         # 6. Set backend to "custom" if not already set (for direct PreTrainedTokenizer subclasses)
         if "backend" not in kwargs:
@@ -883,30 +883,62 @@ def build_inputs_with_special_tokens(
         """
         if self.special_tokens_pattern == "cls_sep":
             # [CLS] seq0 [SEP] or [CLS] seq0 [SEP] seq1 [SEP]
+            if self.cls_token_id is None and self.sep_token_id is None:
+                raise ValueError(
+                    "Cannot add special tokens following 'cls_sep' pattern because one or several special tokens "
+                    f"are not defined (cls_token_id={self.cls_token_id}; sep_token_id={self.sep_token_id})"
+                    "Set the required special tokens in tokenizer or update `tokenizer.special_tokens_pattern`"
+                )
             if token_ids_1 is None:
                 return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
             return [self.cls_token_id] + token_ids_0 + [self.sep_token_id] + token_ids_1 + [self.sep_token_id]
 
         elif self.special_tokens_pattern == "eos":
             # seq0 [EOS] or seq0 [EOS] seq1 [EOS]
+            if self.eos_token_id is None:
+                raise ValueError(
+                    "Cannot add special tokens following 'eos' pattern because eos token is not defined "
+                    f"(eos_token_id={self.eos_token_id})."
+                    "Set the required special tokens in tokenizer or update `tokenizer.special_tokens_pattern`"
+                )
             if token_ids_1 is None:
                 return token_ids_0 + [self.eos_token_id]
             return token_ids_0 + [self.eos_token_id] + token_ids_1 + [self.eos_token_id]
 
         elif self.special_tokens_pattern == "bos":
             # [BOS] seq0 or [BOS] seq0 [BOS] seq1
+            if self.bos_token_id is None:
+                raise ValueError(
+                    "Cannot add special tokens following 'bos' pattern because bos token is not defined "
+                    f"(bos_token_id={self.bos_token_id})."
+                    "Set the required special tokens in tokenizer or update `tokenizer.special_tokens_pattern`"
+                )
             if token_ids_1 is None:
                 return [self.bos_token_id] + token_ids_0
             return [self.bos_token_id] + token_ids_0 + [self.bos_token_id] + token_ids_1
 
         elif self.special_tokens_pattern == "bos_eos":
             # [BOS] seq0 [EOS] or [BOS] seq0 [EOS] seq1 [EOS]
+            if self.bos_token_id is None and self.eos_token_id is None:
+                raise ValueError(
+                    "Cannot add special tokens following 'bos_eos' pattern because one or several special tokens "
+                    f"are not defined (bos_token_id={self.bos_token_id}; eos_token_id={self.eos_token_id})"
+                    "Set the required special tokens in tokenizer or update `tokenizer.special_tokens_pattern`"
+                )
+                return token_ids_0 if token_ids_1 is None else token_ids_0 + token_ids_1
+
             if token_ids_1 is None:
                 return [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
             return [self.bos_token_id] + token_ids_0 + [self.eos_token_id] + token_ids_1 + [self.eos_token_id]
 
         elif self.special_tokens_pattern == "cls_double_sep":
             # [CLS] seq0 [SEP] or [CLS] seq0 [SEP] [SEP] seq1 [SEP]
+            if self.cls_token_id is None and self.sep_token_id is None:
+                raise ValueError(
+                    "Cannot add special tokens following 'cls_double_sep' pattern because one or several special tokens "
+                    f"are not defined (cls_token_id={self.cls_token_id}; sep_token_id={self.sep_token_id})"
+                    "Set the required special tokens in tokenizer or update `tokenizer.special_tokens_pattern`"
+                )
             if token_ids_1 is None:
                 return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
             return (