From 0a6d5a8a71edd8a35d9ca307c176629046d30f19 Mon Sep 17 00:00:00 2001 From: remi-or Date: Mon, 9 Jun 2025 07:48:18 -0500 Subject: [PATCH 1/4] Fix Janus decoder handling (VLM-related) --- src/transformers/models/janus/modeling_janus.py | 6 ++++++ src/transformers/models/janus/modular_janus.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/janus/modeling_janus.py b/src/transformers/models/janus/modeling_janus.py index a526ce5d7af1..483f74e3a94a 100644 --- a/src/transformers/models/janus/modeling_janus.py +++ b/src/transformers/models/janus/modeling_janus.py @@ -1083,6 +1083,12 @@ def get_image_features(self, pixel_values): image_embeds = self.aligner(image_embeds.last_hidden_state) return image_embeds + def set_decoder(self, decoder): + self.model.set_decoder(decoder) + + def get_decoder(self): + return self.model.get_decoder() + @can_return_tuple @auto_docstring def forward( diff --git a/src/transformers/models/janus/modular_janus.py b/src/transformers/models/janus/modular_janus.py index 0d484ffb0c05..0dd0f33993aa 100644 --- a/src/transformers/models/janus/modular_janus.py +++ b/src/transformers/models/janus/modular_janus.py @@ -1030,10 +1030,10 @@ def set_output_embeddings(self, new_embeddings): self.lm_head = new_embeddings def set_decoder(self, decoder): - self.model = decoder + self.model.set_decoder(decoder) def get_decoder(self): - return self.model + return self.model.get_decoder() @can_return_tuple @auto_docstring From dfa5b655b53e9f3835d7becfb798f41f5746a766 Mon Sep 17 00:00:00 2001 From: remi-or Date: Mon, 9 Jun 2025 07:48:48 -0500 Subject: [PATCH 2/4] Remove deprecated kwags in modernbert --- src/transformers/models/modernbert/modeling_modernbert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/modernbert/modeling_modernbert.py b/src/transformers/models/modernbert/modeling_modernbert.py index c0e990971527..cccac89e26db 100644 --- a/src/transformers/models/modernbert/modeling_modernbert.py +++ b/src/transformers/models/modernbert/modeling_modernbert.py @@ -154,7 +154,7 @@ def __init__( up to max_seqlen. If the max_seqlen, device, or dtype during training/inference differ, the cos_sin_cache will be recomputed during the forward pass. """ - super().__init__(dim=dim, base=base, pos_idx_in_fp32=True, device=device, interleaved=False) + super().__init__(dim=dim, base=base, device=device, interleaved=False) self.max_seqlen = max_seqlen if max_seqlen is not None and device is not None and dtype is not None: From 084ea71d694b68581c2358cb25e06730a0867a20 Mon Sep 17 00:00:00 2001 From: remi-or Date: Mon, 9 Jun 2025 08:20:48 -0500 Subject: [PATCH 3/4] Fixed mixup in Janus decoder logic and in modular --- src/transformers/models/janus/modeling_janus.py | 8 ++++---- src/transformers/models/janus/modular_janus.py | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/janus/modeling_janus.py b/src/transformers/models/janus/modeling_janus.py index 483f74e3a94a..bc1185c9d1d5 100644 --- a/src/transformers/models/janus/modeling_janus.py +++ b/src/transformers/models/janus/modeling_janus.py @@ -1084,10 +1084,10 @@ def get_image_features(self, pixel_values): return image_embeds def set_decoder(self, decoder): - self.model.set_decoder(decoder) + self.language_model = decoder def get_decoder(self): - return self.model.get_decoder() + return self.language_model @can_return_tuple @auto_docstring @@ -1198,10 +1198,10 @@ def set_output_embeddings(self, new_embeddings): self.lm_head = new_embeddings def set_decoder(self, decoder): - self.model = decoder + self.model.set_decoder(decoder) def get_decoder(self): - return self.model + return self.model.get_decoder() @can_return_tuple @auto_docstring diff --git a/src/transformers/models/janus/modular_janus.py b/src/transformers/models/janus/modular_janus.py index 0dd0f33993aa..711c81d0d4b6 100644 --- a/src/transformers/models/janus/modular_janus.py +++ b/src/transformers/models/janus/modular_janus.py @@ -921,6 +921,12 @@ def get_image_features(self, pixel_values): image_embeds = self.aligner(image_embeds.last_hidden_state) return image_embeds + def set_decoder(self, decoder): + self.language_model = decoder + + def get_decoder(self): + return self.language_model + @can_return_tuple @auto_docstring def forward( From 9843fa842e7dea285f44ba84637883cb9b68b4c0 Mon Sep 17 00:00:00 2001 From: remi-or Date: Mon, 9 Jun 2025 08:21:01 -0500 Subject: [PATCH 4/4] Fixed modernbert modular --- src/transformers/models/modernbert/modular_modernbert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/modernbert/modular_modernbert.py b/src/transformers/models/modernbert/modular_modernbert.py index 137673cfa590..8909875381d1 100644 --- a/src/transformers/models/modernbert/modular_modernbert.py +++ b/src/transformers/models/modernbert/modular_modernbert.py @@ -417,7 +417,7 @@ def __init__( up to max_seqlen. If the max_seqlen, device, or dtype during training/inference differ, the cos_sin_cache will be recomputed during the forward pass. """ - super().__init__(dim=dim, base=base, pos_idx_in_fp32=True, device=device, interleaved=False) + super().__init__(dim=dim, base=base, device=device, interleaved=False) self.max_seqlen = max_seqlen if max_seqlen is not None and device is not None and dtype is not None: