huggingface · molbap · Feb 5, 2026 · Feb 6, 2026 · zucchini-nlp · Feb 5, 2026
diff --git a/src/transformers/utils/generic.py b/src/transformers/utils/generic.py
@@ -997,9 +997,17 @@ def wrapped_forward(*args, **kwargs):
                 for name, module in self.named_modules():
                     for key, specs in capture_tasks:
                         # The second check is for multimodals where only backbone layer suffix is available
-                        if (specs.target_class is not None and isinstance(module, specs.target_class)) or (
-                            specs.class_name is not None and name.endswith(specs.class_name)
-                        ):
+                        if specs.target_class is not None:
+                            target_cls = specs.target_class
+                            module_cls = module.__class__
+                            matches_target_class = isinstance(module, target_cls) or (
+                                module_cls.__name__ == target_cls.__name__
+                                and module_cls.__module__ == target_cls.__module__
+                            )  # this is to make sure we attach hooks in case of module reloading
+                        else:
+                            matches_target_class = False
+
+                        if matches_target_class or (specs.class_name is not None and name.endswith(specs.class_name)):
                             if specs.layer_name is not None and specs.layer_name not in name:
                                 continue
                             # Monkey patch forward

diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import copy
 import glob
+import importlib
 import json
 import os
 import os.path
@@ -34,6 +35,7 @@
 from parameterized import parameterized
 from pytest import mark
 
+import transformers.models.clip.modeling_clip as modeling_clip
 from transformers import (
     AutoConfig,
     AutoModel,
@@ -43,6 +45,8 @@
     BartForConditionalGeneration,
     BartModel,
     CLIPTextModelWithProjection,
+    CLIPVisionConfig,
+    CLIPVisionModel,
     DynamicCache,
     GPT2Config,
     GPT2LMHeadModel,
@@ -3735,3 +3739,29 @@ def test_vision_language_model(self):
         assert image_encoder is model.model.vision_tower, (
             f"LLaVA get_encoder(modality='image') should return vision_tower, got {type(image_encoder)}"
         )
+
+
+@require_torch
+class TestCheckModelInputsReload(unittest.TestCase):
+    # See https://github.com/linkedin/Liger-Kernel/pull/1061 and
+    # https://github.com/huggingface/transformers/issues/43761
+    def test_hidden_states_after_module_reload(self):
+        config = CLIPVisionConfig(
+            hidden_size=32,
+            intermediate_size=64,
+            num_hidden_layers=2,
+            num_attention_heads=4,
+            image_size=30,
+            patch_size=10,
+        )
+        pixel_values = torch.randn(1, 3, 30, 30, device=torch_device)
+
+        model = CLIPVisionModel(config).to(torch_device)
+        outputs = model(pixel_values=pixel_values, output_hidden_states=True)
+        self.assertIsNotNone(outputs.hidden_states)
+
+        importlib.reload(modeling_clip)
 def test_attention_outputs(self): 
     if not self.has_attentions: 
         self.skipTest(reason="Model does not output attentions") 
 def test_attention_outputs(self): 
     if not self.has_attentions: 
         self.skipTest(reason="Model does not output attentions") 
  
+
+        model_after_reload = CLIPVisionModel(config).to(torch_device)
+        outputs_after_reload = model_after_reload(pixel_values=pixel_values, output_hidden_states=True)
+        self.assertIsNotNone(outputs_after_reload.hidden_states)