Skip to content
Merged
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 1 addition & 20 deletions src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2282,7 +2282,6 @@ def _inner_training_loop(
if args.eval_on_start:
self._evaluate(trial, ignore_keys_for_eval, skip_scheduler=True)

total_batched_samples = 0
for epoch in range(epochs_trained, num_train_epochs):
epoch_iterator = train_dataloader
if hasattr(epoch_iterator, "set_epoch"):
Expand Down Expand Up @@ -2312,8 +2311,6 @@ def _inner_training_loop(

step = -1
for step, inputs in enumerate(epoch_iterator):
total_batched_samples += 1

if self.args.include_num_input_tokens_seen:
main_input_name = getattr(self.model, "main_input_name", "input_ids")
if main_input_name not in inputs:
Expand Down Expand Up @@ -2372,21 +2369,7 @@ def _inner_training_loop(

self.current_flos += float(self.floating_point_ops(inputs))

is_last_step_and_steps_less_than_grad_acc = (
steps_in_epoch <= args.gradient_accumulation_steps and (step + 1) == steps_in_epoch
)

if (
total_batched_samples % args.gradient_accumulation_steps == 0
or
# last step in epoch but step is always smaller than gradient_accumulation_steps
is_last_step_and_steps_less_than_grad_acc
):
# the `or` condition of `is_last_step_and_steps_less_than_grad_acc` is not covered
# in accelerate. So, explicitly enable sync gradients to True in that case.
if is_last_step_and_steps_less_than_grad_acc:
self.accelerator.gradient_state._set_sync_gradients(True)

if self.accelerator.sync_gradients:
# Gradient clipping
if args.max_grad_norm is not None and args.max_grad_norm > 0:
# deepspeed does its own clipping
Expand Down Expand Up @@ -4786,8 +4769,6 @@ def create_accelerator_and_postprocess(self):
# take the gradient_accumulation_steps setting from TrainingArguments.
grad_acc_kwargs["num_steps"] = self.args.gradient_accumulation_steps

grad_acc_kwargs["sync_with_dataloader"] = False

@kibitzing kibitzing Oct 25, 2024

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand correctly, since we're setting self.accelerator.gradient_state._set_sync_gradients by ourselves in the trainer, would it be safer to keep it set to False?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, let's keep it set to False !


gradient_accumulation_plugin = GradientAccumulationPlugin(**grad_acc_kwargs)

accelerator_config = self.args.accelerator_config.to_dict()
Expand Down