Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1454,6 +1454,12 @@ def _from_config(cls, config, **kwargs):
if isinstance(dtype, str):
dtype = getattr(torch, dtype)

# Set the same `dtype` on all subconfigs to avoid dtype mismatch. When "auto" dtype
# with nested models, we can't dispatch different dtype per backbone module
for sub_config_key in config.sub_configs:
if (sub_config := getattr(config, sub_config_key)) is not None:

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this ever be None? The type hint suggests that it shouldn't be

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the subconfig? I remember it was possible in some backbone_configs in pure-vision models. In generative mllm it doesn't happen

Actually I started refactoring those backbones so maybe not needed anymore, then I can clean up everywhere

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah the values in config.sub_configs. I was just curious because it's hinted as dict[str, type["PreTrainedConfig"]]. Checking for None doesn't hurt though

sub_config.dtype = dtype

# If passing `attn_implementation` as kwargs, respect it (it will be applied recursively on subconfigs)
if "attn_implementation" in kwargs:
config._attn_implementation = kwargs.pop("attn_implementation")
Expand Down
9 changes: 9 additions & 0 deletions tests/utils/test_modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,15 @@ def test_model_from_config_dtype_composite(self):
TINY_LLAVA, dtype={"text_config": "float32", "vision_config": "int64", "": "float16"}
)

# Check that `from_config` also works and uses the same dtype for all modules
config = AutoConfig.from_pretrained(TINY_LLAVA)
config.text_config.dtype = torch.float16
config.dtype = torch.float32
model = LlavaForConditionalGeneration._from_config(config)
self.assertEqual(model.model.language_model.dtype, torch.float32)
self.assertEqual(model.model.vision_tower.dtype, torch.float32)
self.assertEqual(model.dtype, torch.float32)

def test_model_from_pretrained_dtype(self):
# test that the model can be instantiated with dtype of either
# 1. explicit from_pretrained's dtype argument
Expand Down
Loading