From 277b735e37465b66ba0fc4f553b9b22b2446ba65 Mon Sep 17 00:00:00 2001 From: sergiopaniego Date: Wed, 5 Nov 2025 13:08:13 +0100 Subject: [PATCH] Prevent upcasting layers in prepare_model_for_kbit_training --- trl/models/utils.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/trl/models/utils.py b/trl/models/utils.py index 450ed0c6fe3..d15bcf4ae70 100644 --- a/trl/models/utils.py +++ b/trl/models/utils.py @@ -485,18 +485,10 @@ def prepare_model_for_kbit_training(model, use_gradient_checkpointing=True, grad if gradient_checkpointing_kwargs is None: gradient_checkpointing_kwargs = {} - n_upcasted = 0 - for name, param in model.named_parameters(): + for _, param in model.named_parameters(): # freeze all parameters param.requires_grad = False - # upcast LayerNorm / Norm to float32 for numerical stability - if (param.dtype in [torch.float16, torch.bfloat16]) and ( - "norm" in name.lower() or "layernorm" in name.lower() - ): - param.data = param.data.to(torch.float32) - n_upcasted += 1 - # Enable gradient checkpointing if needed if (loaded_in_kbit or is_quantized) and use_gradient_checkpointing: if hasattr(model, "enable_input_require_grads"):