From 63504184c63880b98bd319f239e2a72722e93f9b Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 17 Mar 2026 11:17:10 +0100 Subject: [PATCH 1/6] Update comment --- trl/trainer/reward_trainer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py index 1a105abf6a7..3c5992c4ec0 100644 --- a/trl/trainer/reward_trainer.py +++ b/trl/trainer/reward_trainer.py @@ -62,9 +62,11 @@ logger = get_logger(__name__) -# AutoModelForSequenceClassification adds a new classification head when loading a CausalLM. That head is randomly -# initialized and triggers a harmless warning about uninitialized weights. We suppress just that specific warning to -# avoid confusing users. +# Loading a CausalLM checkpoint into AutoModelForSequenceClassification triggers two harmless warnings: +# - MISSING score.weight : the new seq-clf head was not in the checkpoint and is randomly initialized. +# - UNEXPECTED lm_head.weight : the causal LM head is in the checkpoint but absent from seq-clf. +# Both are expected consequences of intentional cross-architecture loading. We suppress them to avoid +# confusing users. # Old approach using logging filter (for transformers < 4.57.0) From 704bd645e60756737456a2087cb23c6867fbd95b Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 17 Mar 2026 11:17:39 +0100 Subject: [PATCH 2/6] Update suppress_from_pretrained_warning --- trl/trainer/reward_trainer.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py index 3c5992c4ec0..9dcfef505fe 100644 --- a/trl/trainer/reward_trainer.py +++ b/trl/trainer/reward_trainer.py @@ -72,15 +72,19 @@ # Old approach using logging filter (for transformers < 4.57.0) @contextmanager def suppress_from_pretrained_warning(logger: logging.Logger): - pattern = re.compile( + missing_pattern = re.compile( r"^Some weights of \S+ were not initialized from the model checkpoint at \S+ and are newly initialized: " r"\[.*\]\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and " r"inference\.$" ) + unexpected_pattern = re.compile( + r"^Some weights of the model checkpoint at \S+ were not used when initializing \S+: \[.*lm_head.*\]" + ) class _Filter(logging.Filter): def filter(self, record: logging.LogRecord) -> bool: - return not pattern.search(record.getMessage()) + msg = record.getMessage() + return not (missing_pattern.search(msg) or unexpected_pattern.search(msg)) f = _Filter() logger.addFilter(f) From 81e26d6a9f97a64571d419c143f7514fa417538f Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 17 Mar 2026 11:18:53 +0100 Subject: [PATCH 3/6] Update ignore_seqcls_score_missing_key --- trl/trainer/reward_trainer.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py index 9dcfef505fe..5c2672382a9 100644 --- a/trl/trainer/reward_trainer.py +++ b/trl/trainer/reward_trainer.py @@ -97,17 +97,26 @@ def filter(self, record: logging.LogRecord) -> bool: # New approach using scoped override (for transformers >= 4.57.0) @contextmanager def ignore_seqcls_score_missing_key(): - # Scoped override: ignore only the expected seq-clf head key. - old = getattr(GenericForSequenceClassification, "_keys_to_ignore_on_load_missing", None) - merged = list(old) if old is not None else [] - pattern = r"^score\.weight$" - if pattern not in merged: - merged.append(pattern) - GenericForSequenceClassification._keys_to_ignore_on_load_missing = merged + # Scoped override: ignore the expected seq-clf head key (newly added) and the causal LM head + # key (present in the checkpoint but absent from seq-clf). + old_missing = getattr(GenericForSequenceClassification, "_keys_to_ignore_on_load_missing", None) + old_unexpected = getattr(GenericForSequenceClassification, "_keys_to_ignore_on_load_unexpected", None) + + merged_missing = list(old_missing) if old_missing is not None else [] + if r"^score\.weight$" not in merged_missing: + merged_missing.append(r"^score\.weight$") + + merged_unexpected = list(old_unexpected) if old_unexpected is not None else [] + if r"^lm_head\." not in merged_unexpected: + merged_unexpected.append(r"^lm_head\.") + + GenericForSequenceClassification._keys_to_ignore_on_load_missing = merged_missing + GenericForSequenceClassification._keys_to_ignore_on_load_unexpected = merged_unexpected try: yield finally: - GenericForSequenceClassification._keys_to_ignore_on_load_missing = old + GenericForSequenceClassification._keys_to_ignore_on_load_missing = old_missing + GenericForSequenceClassification._keys_to_ignore_on_load_unexpected = old_unexpected # Version-aware wrapper that chooses the appropriate approach From 0686133f8be7f725a8f65fd8100ad89ccfa9ed4a Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 17 Mar 2026 11:19:56 +0100 Subject: [PATCH 4/6] Rename ignore_seqcls_score_missing_key to _ignore_seqcls_cross_arch_keys --- trl/trainer/reward_trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py index 5c2672382a9..6fb3ad9d98a 100644 --- a/trl/trainer/reward_trainer.py +++ b/trl/trainer/reward_trainer.py @@ -96,7 +96,7 @@ def filter(self, record: logging.LogRecord) -> bool: # New approach using scoped override (for transformers >= 4.57.0) @contextmanager -def ignore_seqcls_score_missing_key(): +def _ignore_seqcls_cross_arch_keys(): # Scoped override: ignore the expected seq-clf head key (newly added) and the causal LM head # key (present in the checkpoint but absent from seq-clf). old_missing = getattr(GenericForSequenceClassification, "_keys_to_ignore_on_load_missing", None) @@ -125,7 +125,7 @@ def suppress_seqcls_warning(): # Use the new approach for transformers >= 4.57.0, old approach for earlier versions # The old approach is needed for 4.56.2 to avoid meta tensor issues with device_map=None if Version(transformers.__version__) >= Version("4.57.0"): - with ignore_seqcls_score_missing_key(): + with _ignore_seqcls_cross_arch_keys(): yield else: # Get the transformers logger From e11d4e5a7b5f38c149743d0ad6a795e0f343ce3a Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 17 Mar 2026 11:21:04 +0100 Subject: [PATCH 5/6] Rename suppress_from_pretrained_warning to _suppress_seqcls_cross_arch_keys --- trl/trainer/reward_trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py index 6fb3ad9d98a..8e08b2ddf98 100644 --- a/trl/trainer/reward_trainer.py +++ b/trl/trainer/reward_trainer.py @@ -71,7 +71,7 @@ # Old approach using logging filter (for transformers < 4.57.0) @contextmanager -def suppress_from_pretrained_warning(logger: logging.Logger): +def _suppress_seqcls_cross_arch_keys(logger: logging.Logger): missing_pattern = re.compile( r"^Some weights of \S+ were not initialized from the model checkpoint at \S+ and are newly initialized: " r"\[.*\]\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and " @@ -130,7 +130,7 @@ def suppress_seqcls_warning(): else: # Get the transformers logger transformers_logger = logging.getLogger("transformers.modeling_utils") - with suppress_from_pretrained_warning(transformers_logger): + with _suppress_seqcls_cross_arch_keys(transformers_logger): yield From a91d7b1e0bc19ca8b84a7ec0ce48de50d03881e8 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 17 Mar 2026 11:46:11 +0100 Subject: [PATCH 6/6] Revert because UNEXPECTED was not emitted for transformers < 4.57.0 --- trl/trainer/reward_trainer.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py index 8e08b2ddf98..cd079499b51 100644 --- a/trl/trainer/reward_trainer.py +++ b/trl/trainer/reward_trainer.py @@ -62,29 +62,26 @@ logger = get_logger(__name__) -# Loading a CausalLM checkpoint into AutoModelForSequenceClassification triggers two harmless warnings: -# - MISSING score.weight : the new seq-clf head was not in the checkpoint and is randomly initialized. -# - UNEXPECTED lm_head.weight : the causal LM head is in the checkpoint but absent from seq-clf. +# Loading a CausalLM checkpoint into AutoModelForSequenceClassification triggers harmless warnings: +# - MISSING score.weight : the new seq-clf head was not in the checkpoint and is randomly initialized. +# - UNEXPECTED lm_head.weight: the causal LM head is in the checkpoint but absent from seq-clf (>= 4.57.0 only). # Both are expected consequences of intentional cross-architecture loading. We suppress them to avoid # confusing users. # Old approach using logging filter (for transformers < 4.57.0) +# Note: in transformers < 4.57.0, only the MISSING score.weight warning is emitted; lm_head.weight is not reported. @contextmanager def _suppress_seqcls_cross_arch_keys(logger: logging.Logger): - missing_pattern = re.compile( + pattern = re.compile( r"^Some weights of \S+ were not initialized from the model checkpoint at \S+ and are newly initialized: " r"\[.*\]\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and " r"inference\.$" ) - unexpected_pattern = re.compile( - r"^Some weights of the model checkpoint at \S+ were not used when initializing \S+: \[.*lm_head.*\]" - ) class _Filter(logging.Filter): def filter(self, record: logging.LogRecord) -> bool: - msg = record.getMessage() - return not (missing_pattern.search(msg) or unexpected_pattern.search(msg)) + return not pattern.search(record.getMessage()) f = _Filter() logger.addFilter(f)