From 63504184c63880b98bd319f239e2a72722e93f9b Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Tue, 17 Mar 2026 11:17:10 +0100
Subject: [PATCH 1/6] Update comment

---
 trl/trainer/reward_trainer.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py
index 1a105abf6a7..3c5992c4ec0 100644
--- a/trl/trainer/reward_trainer.py
+++ b/trl/trainer/reward_trainer.py
@@ -62,9 +62,11 @@
 logger = get_logger(__name__)
 
 
-# AutoModelForSequenceClassification adds a new classification head when loading a CausalLM. That head is randomly
-# initialized and triggers a harmless warning about uninitialized weights. We suppress just that specific warning to
-# avoid confusing users.
+# Loading a CausalLM checkpoint into AutoModelForSequenceClassification triggers two harmless warnings:
+#   - MISSING  score.weight : the new seq-clf head was not in the checkpoint and is randomly initialized.
+#   - UNEXPECTED lm_head.weight : the causal LM head is in the checkpoint but absent from seq-clf.
+# Both are expected consequences of intentional cross-architecture loading. We suppress them to avoid
+# confusing users.
 
 
 # Old approach using logging filter (for transformers < 4.57.0)

From 704bd645e60756737456a2087cb23c6867fbd95b Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Tue, 17 Mar 2026 11:17:39 +0100
Subject: [PATCH 2/6] Update suppress_from_pretrained_warning

---
 trl/trainer/reward_trainer.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py
index 3c5992c4ec0..9dcfef505fe 100644
--- a/trl/trainer/reward_trainer.py
+++ b/trl/trainer/reward_trainer.py
@@ -72,15 +72,19 @@
 # Old approach using logging filter (for transformers < 4.57.0)
 @contextmanager
 def suppress_from_pretrained_warning(logger: logging.Logger):
-    pattern = re.compile(
+    missing_pattern = re.compile(
         r"^Some weights of \S+ were not initialized from the model checkpoint at \S+ and are newly initialized: "
         r"\[.*\]\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and "
         r"inference\.$"
     )
+    unexpected_pattern = re.compile(
+        r"^Some weights of the model checkpoint at \S+ were not used when initializing \S+: \[.*lm_head.*\]"
+    )
 
     class _Filter(logging.Filter):
         def filter(self, record: logging.LogRecord) -> bool:
-            return not pattern.search(record.getMessage())
+            msg = record.getMessage()
+            return not (missing_pattern.search(msg) or unexpected_pattern.search(msg))
 
     f = _Filter()
     logger.addFilter(f)

From 81e26d6a9f97a64571d419c143f7514fa417538f Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Tue, 17 Mar 2026 11:18:53 +0100
Subject: [PATCH 3/6] Update ignore_seqcls_score_missing_key

---
 trl/trainer/reward_trainer.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py
index 9dcfef505fe..5c2672382a9 100644
--- a/trl/trainer/reward_trainer.py
+++ b/trl/trainer/reward_trainer.py
@@ -97,17 +97,26 @@ def filter(self, record: logging.LogRecord) -> bool:
 # New approach using scoped override (for transformers >= 4.57.0)
 @contextmanager
 def ignore_seqcls_score_missing_key():
-    # Scoped override: ignore only the expected seq-clf head key.
-    old = getattr(GenericForSequenceClassification, "_keys_to_ignore_on_load_missing", None)
-    merged = list(old) if old is not None else []
-    pattern = r"^score\.weight$"
-    if pattern not in merged:
-        merged.append(pattern)
-    GenericForSequenceClassification._keys_to_ignore_on_load_missing = merged
+    # Scoped override: ignore the expected seq-clf head key (newly added) and the causal LM head
+    # key (present in the checkpoint but absent from seq-clf).
+    old_missing = getattr(GenericForSequenceClassification, "_keys_to_ignore_on_load_missing", None)
+    old_unexpected = getattr(GenericForSequenceClassification, "_keys_to_ignore_on_load_unexpected", None)
+
+    merged_missing = list(old_missing) if old_missing is not None else []
+    if r"^score\.weight$" not in merged_missing:
+        merged_missing.append(r"^score\.weight$")
+
+    merged_unexpected = list(old_unexpected) if old_unexpected is not None else []
+    if r"^lm_head\." not in merged_unexpected:
+        merged_unexpected.append(r"^lm_head\.")
+
+    GenericForSequenceClassification._keys_to_ignore_on_load_missing = merged_missing
+    GenericForSequenceClassification._keys_to_ignore_on_load_unexpected = merged_unexpected
     try:
         yield
     finally:
-        GenericForSequenceClassification._keys_to_ignore_on_load_missing = old
+        GenericForSequenceClassification._keys_to_ignore_on_load_missing = old_missing
+        GenericForSequenceClassification._keys_to_ignore_on_load_unexpected = old_unexpected
 
 
 # Version-aware wrapper that chooses the appropriate approach

From 0686133f8be7f725a8f65fd8100ad89ccfa9ed4a Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Tue, 17 Mar 2026 11:19:56 +0100
Subject: [PATCH 4/6] Rename ignore_seqcls_score_missing_key to
 _ignore_seqcls_cross_arch_keys

---
 trl/trainer/reward_trainer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py
index 5c2672382a9..6fb3ad9d98a 100644
--- a/trl/trainer/reward_trainer.py
+++ b/trl/trainer/reward_trainer.py
@@ -96,7 +96,7 @@ def filter(self, record: logging.LogRecord) -> bool:
 
 # New approach using scoped override (for transformers >= 4.57.0)
 @contextmanager
-def ignore_seqcls_score_missing_key():
+def _ignore_seqcls_cross_arch_keys():
     # Scoped override: ignore the expected seq-clf head key (newly added) and the causal LM head
     # key (present in the checkpoint but absent from seq-clf).
     old_missing = getattr(GenericForSequenceClassification, "_keys_to_ignore_on_load_missing", None)
@@ -125,7 +125,7 @@ def suppress_seqcls_warning():
     # Use the new approach for transformers >= 4.57.0, old approach for earlier versions
     # The old approach is needed for 4.56.2 to avoid meta tensor issues with device_map=None
     if Version(transformers.__version__) >= Version("4.57.0"):
-        with ignore_seqcls_score_missing_key():
+        with _ignore_seqcls_cross_arch_keys():
             yield
     else:
         # Get the transformers logger

From e11d4e5a7b5f38c149743d0ad6a795e0f343ce3a Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Tue, 17 Mar 2026 11:21:04 +0100
Subject: [PATCH 5/6] Rename suppress_from_pretrained_warning to
 _suppress_seqcls_cross_arch_keys

---
 trl/trainer/reward_trainer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py
index 6fb3ad9d98a..8e08b2ddf98 100644
--- a/trl/trainer/reward_trainer.py
+++ b/trl/trainer/reward_trainer.py
@@ -71,7 +71,7 @@
 
 # Old approach using logging filter (for transformers < 4.57.0)
 @contextmanager
-def suppress_from_pretrained_warning(logger: logging.Logger):
+def _suppress_seqcls_cross_arch_keys(logger: logging.Logger):
     missing_pattern = re.compile(
         r"^Some weights of \S+ were not initialized from the model checkpoint at \S+ and are newly initialized: "
         r"\[.*\]\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and "
@@ -130,7 +130,7 @@ def suppress_seqcls_warning():
     else:
         # Get the transformers logger
         transformers_logger = logging.getLogger("transformers.modeling_utils")
-        with suppress_from_pretrained_warning(transformers_logger):
+        with _suppress_seqcls_cross_arch_keys(transformers_logger):
             yield
 
 

From a91d7b1e0bc19ca8b84a7ec0ce48de50d03881e8 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Tue, 17 Mar 2026 11:46:11 +0100
Subject: [PATCH 6/6] Revert because UNEXPECTED was not emitted for
 transformers < 4.57.0

---
 trl/trainer/reward_trainer.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py
index 8e08b2ddf98..cd079499b51 100644
--- a/trl/trainer/reward_trainer.py
+++ b/trl/trainer/reward_trainer.py
@@ -62,29 +62,26 @@
 logger = get_logger(__name__)
 
 
-# Loading a CausalLM checkpoint into AutoModelForSequenceClassification triggers two harmless warnings:
-#   - MISSING  score.weight : the new seq-clf head was not in the checkpoint and is randomly initialized.
-#   - UNEXPECTED lm_head.weight : the causal LM head is in the checkpoint but absent from seq-clf.
+# Loading a CausalLM checkpoint into AutoModelForSequenceClassification triggers harmless warnings:
+#   - MISSING  score.weight    : the new seq-clf head was not in the checkpoint and is randomly initialized.
+#   - UNEXPECTED lm_head.weight: the causal LM head is in the checkpoint but absent from seq-clf (>= 4.57.0 only).
 # Both are expected consequences of intentional cross-architecture loading. We suppress them to avoid
 # confusing users.
 
 
 # Old approach using logging filter (for transformers < 4.57.0)
+# Note: in transformers < 4.57.0, only the MISSING score.weight warning is emitted; lm_head.weight is not reported.
 @contextmanager
 def _suppress_seqcls_cross_arch_keys(logger: logging.Logger):
-    missing_pattern = re.compile(
+    pattern = re.compile(
         r"^Some weights of \S+ were not initialized from the model checkpoint at \S+ and are newly initialized: "
         r"\[.*\]\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and "
         r"inference\.$"
     )
-    unexpected_pattern = re.compile(
-        r"^Some weights of the model checkpoint at \S+ were not used when initializing \S+: \[.*lm_head.*\]"
-    )
 
     class _Filter(logging.Filter):
         def filter(self, record: logging.LogRecord) -> bool:
-            msg = record.getMessage()
-            return not (missing_pattern.search(msg) or unexpected_pattern.search(msg))
+            return not pattern.search(record.getMessage())
 
     f = _Filter()
     logger.addFilter(f)