Skip to content

Commit

Permalink
Address feedback
Browse files Browse the repository at this point in the history
Signed-off-by: SeanNaren <[email protected]>
  • Loading branch information
SeanNaren committed Apr 4, 2023
1 parent 9c6db11 commit ef2d18e
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,11 @@ def build_data_loader(
)

def setup_training_data(self):
if not self.cfg.data.train_ds.drop_last:
raise AttributeError(
"`drop_last` is required for the training dataset to ensure each batch is the same micro-batch size."
"To set this, set the variable `data.train_ds.drop_last=True` in the config."
)
self._train_dl = self.build_data_loader(
self._train_ds,
batch_size=self.cfg.data.train_ds.micro_batch_size,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,25 +374,19 @@ def training_step(self, dataloader_iter, batch_idx):
if loss_scale is not None:
self.log('loss_scale', loss_scale)

self.log(
'reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=self.cfg.global_batch_size
)
self.log('reduced_train_loss', loss_mean, prog_bar=True, rank_zero_only=True, batch_size=1)
lr = self._optimizer.param_groups[0]['lr']
self.log('lr', lr, rank_zero_only=True, batch_size=self.cfg.global_batch_size)
self.log('lr', lr, rank_zero_only=True, batch_size=1)
self.log(
'global_step',
self.trainer.global_step,
prog_bar=True,
rank_zero_only=True,
batch_size=self.cfg.global_batch_size,
'global_step', self.trainer.global_step, prog_bar=True, rank_zero_only=True, batch_size=1,
)
# TODO: make sure compute_consumed_samples works for pipeline parallelism
self.log(
'consumed_samples',
self.compute_consumed_samples(self.trainer.global_step - self.init_global_step),
prog_bar=True,
rank_zero_only=True,
batch_size=self.cfg.global_batch_size,
batch_size=1,
)
return loss_mean

Expand Down

0 comments on commit ef2d18e

Please sign in to comment.