diff --git a/paddlenlp/trainer/training_args.py b/paddlenlp/trainer/training_args.py index 846a0d182f5a..c155d4c4844b 100644 --- a/paddlenlp/trainer/training_args.py +++ b/paddlenlp/trainer/training_args.py @@ -1174,9 +1174,6 @@ def is_segment_parallel_supported(): pipeline.micro_batch_size = self.per_device_train_batch_size pipeline.schedule_mode = self.pipeline_schedule_mode - if self.amp_master_grad: - warnings.warn("`amp_master_grad` is not supported NOW in AutoParallel!") - self.amp_master_grad = False logger.info(f"PP configs:{strategy.pipeline}, use master_grad: {self.amp_master_grad}") if self.do_eval: @@ -1260,6 +1257,7 @@ def is_segment_parallel_supported(): amp.enable = True amp.dtype = "bfloat16" if self.bf16 else "float16" amp.level = self.fp16_opt_level.lower() + amp.use_master_grad = self.amp_master_grad amp.init_loss_scaling = self.scale_loss amp.custom_black_list = self.amp_custom_black_list if self.amp_custom_black_list is not None else [] amp.custom_white_list = self.amp_custom_white_list if self.amp_custom_white_list is not None else []