Skip to content
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1831,6 +1831,7 @@ def _inner_training_loop(
# AT THE VERY END!
_ = list(train_dataloader.sampler)

total_steps = 0
Comment thread
stas00 marked this conversation as resolved.
Outdated
for epoch in range(epochs_trained, num_train_epochs):
if isinstance(train_dataloader, DataLoader) and isinstance(train_dataloader.sampler, DistributedSampler):
train_dataloader.sampler.set_epoch(epoch)
Expand Down Expand Up @@ -1867,6 +1868,7 @@ def _inner_training_loop(

step = -1
for step, inputs in enumerate(epoch_iterator):
total_steps += 1
Comment thread
stas00 marked this conversation as resolved.
Outdated
if rng_to_sync:
self._load_rng_state(resume_from_checkpoint)
rng_to_sync = False
Expand All @@ -1886,8 +1888,9 @@ def _inner_training_loop(
if step % args.gradient_accumulation_steps == 0:
self.control = self.callback_handler.on_step_begin(args, self.state, self.control)

# print(f"total_steps={total_steps}")
if (
((step + 1) % args.gradient_accumulation_steps != 0)
(total_steps % args.gradient_accumulation_steps != 0)
and args.local_rank != -1
and args._no_sync_in_gradient_accumulation
):
Expand All @@ -1913,11 +1916,13 @@ def _inner_training_loop(
if self.deepspeed:
self.deepspeed.step()

if (step + 1) % args.gradient_accumulation_steps == 0 or (
# print(f"{step+1} { (step + 1) % args.gradient_accumulation_steps }")
if total_steps % args.gradient_accumulation_steps == 0 or (
# last step in epoch but step is always smaller than gradient_accumulation_steps
steps_in_epoch <= args.gradient_accumulation_steps
and (step + 1) == steps_in_epoch
):
# print("boundary")
# Gradient clipping
if args.max_grad_norm is not None and args.max_grad_norm > 0 and not self.deepspeed:
# deepspeed does its own clipping
Expand Down