diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 3d60f803329e..fcc78c33d173 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -1020,11 +1020,15 @@ def create_optimizer(self): decay_parameters = [name for name in decay_parameters if "bias" not in name] optimizer_grouped_parameters = [ { - "params": [p for n, p in opt_model.named_parameters() if n in decay_parameters], + "params": [ + p for n, p in opt_model.named_parameters() if (n in decay_parameters and p.requires_grad) + ], "weight_decay": self.args.weight_decay, }, { - "params": [p for n, p in opt_model.named_parameters() if n not in decay_parameters], + "params": [ + p for n, p in opt_model.named_parameters() if (n not in decay_parameters and p.requires_grad) + ], "weight_decay": 0.0, }, ]