diff --git a/src/transformers/core_model_loading.py b/src/transformers/core_model_loading.py
index b3cb6ebd1be4..81854dda1b74 100644
--- a/src/transformers/core_model_loading.py
+++ b/src/transformers/core_model_loading.py
@@ -594,7 +594,6 @@ def set_param_for_module(
             missing_keys.discard(target_name)
             if ref is not None and ref.shape != param_value.shape and hf_quantizer is None:
                 mismatch_keys.add((target_name, param_value.shape, ref.shape))
-                module_obj.param_name._is_hf_initialized = False  # Needs to be initialized
             else:
                 # super important otherwise _init_weight will re-init the param
                 param_value._is_hf_initialized = True
diff --git a/src/transformers/integrations/hub_kernels.py b/src/transformers/integrations/hub_kernels.py
index 8d3ae310687e..7974fa4dd173 100644
--- a/src/transformers/integrations/hub_kernels.py
+++ b/src/transformers/integrations/hub_kernels.py
@@ -341,6 +341,12 @@ def lazy_load_kernel(kernel_name: str, mapping: dict[str, ModuleType | None] = _
             mapping[kernel_name] = kernel
         except FileNotFoundError:
             mapping[kernel_name] = None
+        except AssertionError as error:
+            logger.warning_once(
+                f"Failed to load the '{kernel_name}' kernel from '{repo_id}' because the current environment does not "
+                f"support the required backend: {error}"
+            )
+            mapping[kernel_name] = None
 
     else:
         # Try to import is_{kernel_name}_available from ..utils
diff --git a/src/transformers/models/fsmt/configuration_fsmt.py b/src/transformers/models/fsmt/configuration_fsmt.py
index a1075016c3f4..fc68464f252f 100644
--- a/src/transformers/models/fsmt/configuration_fsmt.py
+++ b/src/transformers/models/fsmt/configuration_fsmt.py
@@ -194,6 +194,7 @@ def __init__(
             bos_token_id=eos_token_id,
             is_encoder_decoder=is_encoder_decoder,
             num_hidden_layers=encoder_layers,
+            tie_word_embeddings=tie_word_embeddings,
         )
         if "decoder" in common_kwargs:
             del common_kwargs["decoder"]
diff --git a/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py b/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py
index c0170f45ac43..a399f6d8f00d 100644
--- a/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py
+++ b/src/transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py
@@ -1069,7 +1069,7 @@ def _prepare_4d_causal_attention_mask_with_cache_position(
 
 @auto_docstring
 class KyutaiSpeechToTextForConditionalGeneration(KyutaiSpeechToTextPreTrainedModel, GenerationMixin):
-    _tied_weights_keys = {"lm_head.weight": "model.embed_tokens.weight"}
+    _tied_weights_keys = {"lm_head.weight": "model.embed_tokens.embed_tokens.weight"}
     _tp_plan = {"lm_head": "colwise_rep"}
     _pp_plan = {"lm_head": (["hidden_states"], ["logits"])}
     _keep_in_fp32_modules_strict = ["codec_model"]
diff --git a/src/transformers/models/llava_next_video/configuration_llava_next_video.py b/src/transformers/models/llava_next_video/configuration_llava_next_video.py
index 7b82b5ac5b89..86dec22c8e5b 100644
--- a/src/transformers/models/llava_next_video/configuration_llava_next_video.py
+++ b/src/transformers/models/llava_next_video/configuration_llava_next_video.py
@@ -103,7 +103,6 @@ def __init__(
         vision_feature_select_strategy="default",
         vision_feature_layer=-2,
         image_grid_pinpoints=None,
-        tie_word_embeddings=False,
         video_token_index=32000,
         spatial_pool_mode="average",
         spatial_pool_stride=2,
@@ -160,7 +159,13 @@ def __init__(
 
         self.text_config = text_config
 
-        super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
+        super().__init__(**kwargs)
+
+        # Due to a mismatch at model addition-time, the `tie_word_embeddings` was saved in the text config, even
+        # though it concerns the main model, while it was set to False by default in the main model... So we hardcode a fix here
+        if not self.tie_word_embeddings and self.text_config.tie_word_embeddings:
+            self.tie_word_embeddings = True
+            self.text_config.tie_word_embeddings = False
 
 
 __all__ = ["LlavaNextVideoConfig"]
diff --git a/src/transformers/models/llava_onevision/configuration_llava_onevision.py b/src/transformers/models/llava_onevision/configuration_llava_onevision.py
index 9fd1e850f0e5..cb957a992216 100644
--- a/src/transformers/models/llava_onevision/configuration_llava_onevision.py
+++ b/src/transformers/models/llava_onevision/configuration_llava_onevision.py
@@ -102,7 +102,6 @@ def __init__(
         vision_feature_layer=-1,
         vision_aspect_ratio="anyres_max_9",
         image_grid_pinpoints=None,
-        tie_word_embeddings=False,
         multimodal_projector_bias=True,
         **kwargs,
     ):
@@ -188,7 +187,13 @@ def __init__(
 
         self.text_config = text_config
 
-        super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
+        super().__init__(**kwargs)
+
+        # Due to a mismatch at model addition-time, the `tie_word_embeddings` was saved in the text config, even
+        # though it concerns the main model, while it was set to False by default in the main model... So we hardcode a fix here
+        if not self.tie_word_embeddings and self.text_config.tie_word_embeddings:
+            self.tie_word_embeddings = True
+            self.text_config.tie_word_embeddings = False
 
 
 __all__ = ["LlavaOnevisionConfig"]
diff --git a/src/transformers/models/musicgen/configuration_musicgen.py b/src/transformers/models/musicgen/configuration_musicgen.py
index 76c951668f46..bff720e5cfe2 100644
--- a/src/transformers/models/musicgen/configuration_musicgen.py
+++ b/src/transformers/models/musicgen/configuration_musicgen.py
@@ -221,5 +221,11 @@ def __init__(self, text_encoder, audio_encoder, decoder, **kwargs):
     def sampling_rate(self):
         return self.audio_encoder.sampling_rate
 
+    # overriding these because they crash - not 100% sure of that one
+    def get_text_config(self, decoder=None, encoder=None):
+        if decoder is None and encoder is None:
+            decoder = True
+        return super().get_text_config(decoder=decoder, encoder=encoder)
+
 
 __all__ = ["MusicgenConfig", "MusicgenDecoderConfig"]
diff --git a/src/transformers/models/musicgen_melody/configuration_musicgen_melody.py b/src/transformers/models/musicgen_melody/configuration_musicgen_melody.py
index a4ec8528590a..af184b241408 100644
--- a/src/transformers/models/musicgen_melody/configuration_musicgen_melody.py
+++ b/src/transformers/models/musicgen_melody/configuration_musicgen_melody.py
@@ -234,5 +234,11 @@ def __init__(
     def sampling_rate(self):
         return self.audio_encoder.sampling_rate
 
+    # overriding these because they crash - not 100% sure of that one
+    def get_text_config(self, decoder=None, encoder=None):
+        if decoder is None and encoder is None:
+            decoder = True
+        return super().get_text_config(decoder=decoder, encoder=encoder)
+
 
 __all__ = ["MusicgenMelodyConfig", "MusicgenMelodyDecoderConfig"]
diff --git a/tests/models/fsmt/test_modeling_fsmt.py b/tests/models/fsmt/test_modeling_fsmt.py
index acc29cac7ec0..d14c6b3225a2 100644
--- a/tests/models/fsmt/test_modeling_fsmt.py
+++ b/tests/models/fsmt/test_modeling_fsmt.py
@@ -125,6 +125,7 @@ def get_config(self):
             eos_token_id=self.eos_token_id,
             bos_token_id=self.bos_token_id,
             pad_token_id=self.pad_token_id,
+            tie_word_embeddings=True,
         )
 
     def prepare_config_and_inputs_for_common(self):
@@ -254,6 +255,7 @@ def test_ensure_weights_are_shared(self):
         config, inputs_dict = self.model_tester.prepare_config_and_inputs()
 
         config.tie_word_embeddings = True
+        config.decoder.tie_word_embeddings = True
         model = FSMTForConditionalGeneration(config)
 
         # FSMT shares three weights.
@@ -270,6 +272,7 @@ def test_ensure_weights_are_shared(self):
         )
 
         config.tie_word_embeddings = False
+        config.decoder.tie_word_embeddings = False
         model = FSMTForConditionalGeneration(config)
 
         # FSMT shares three weights.
diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
index d2b5e0949cac..46f644e080bb 100755
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -2103,6 +2103,49 @@ def test_tied_weights_keys(self):
                 f"Missing `_tied_weights_keys` for {model_class}: add all of {tied_params} except one.",
             )
 
+    def test_tie_word_embeddings_is_authoritative(self):
+        original_config, _ = self.model_tester.prepare_config_and_inputs_for_common()
+
+        for model_class in self.all_model_classes:
+            tied_config = copy.deepcopy(original_config)
+            tied_config.get_text_config().tie_word_embeddings = True
+
+            untied_config = copy.deepcopy(original_config)
+            untied_config.get_text_config().tie_word_embeddings = False
+
+            model_tied = model_class(tied_config)
+            model_untied = model_class(untied_config)
+
+            if not hasattr(model_tied, "_tied_weights_keys") or not model_tied._tied_weights_keys:
+                continue
+
+            tied_keys = model_tied._tied_weights_keys
+            state_dict_tied = model_tied.state_dict()
+            state_dict_untied = model_untied.state_dict()
+
+            for target_key, source_key in tied_keys.items():
+                if target_key not in state_dict_tied or source_key not in state_dict_tied:
+                    continue
+                if target_key not in state_dict_untied or source_key not in state_dict_untied:
+                    continue
+
+                target_tied_ptr = id_tensor_storage(state_dict_tied[target_key])
+                source_tied_ptr = id_tensor_storage(state_dict_tied[source_key])
+                target_untied_ptr = id_tensor_storage(state_dict_untied[target_key])
+                source_untied_ptr = id_tensor_storage(state_dict_untied[source_key])
+
+                self.assertEqual(
+                    target_tied_ptr,
+                    source_tied_ptr,
+                    f"{model_class}: With tie_word_embeddings=True, '{target_key}' should share storage with '{source_key}'",
+                )
+                self.assertNotEqual(
+                    target_untied_ptr,
+                    source_untied_ptr,
+                    f"{model_class}: With tie_word_embeddings=False, '{target_key}' should NOT share storage with '{source_key}'. "
+                    f"Config tie_word_embeddings must be authoritative over class-level _tied_weights_keys.",
+                )
+
     def test_model_weights_reload_no_missing_tied_weights(self):
         for model_class in self.all_model_classes:
             config, _ = self.model_tester.prepare_config_and_inputs_for_common()