From f7a189fb20955effc309d11891d94756dfee6ea7 Mon Sep 17 00:00:00 2001 From: awaelchli Date: Wed, 10 Aug 2022 00:32:37 +0200 Subject: [PATCH] Reset all results on epoch end (#14061) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos MocholĂ­ --- src/pytorch_lightning/CHANGELOG.md | 2 +- .../logger_connector/logger_connector.py | 3 +- .../logging_/test_train_loop_logging.py | 29 +++++++++++++++++-- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index 3aee9a5ce4a90..8ff6b04e355db 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -8,7 +8,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- +- Fixed epoch-end logging results not being reset after the end of the epoch ([#14061](https://github.com/Lightning-AI/lightning/pull/14061)) ## [1.7.1] - 2022-08-09 diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index ff882912625d0..02e17a8d93494 100644 --- a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -163,8 +163,7 @@ def update_train_epoch_metrics(self) -> None: self.log_metrics(self.metrics["log"]) # reset result collection for next epoch - assert self.trainer._results is not None - self.trainer._results.reset(metrics=True) + self.reset_results() """ Utilities and properties diff --git a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py index 5855eba4c86af..d16be306b9365 100644 --- a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py +++ b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py @@ -569,11 +569,12 @@ def on_train_epoch_end(self, trainer, pl_module): "accelerator", [ pytest.param("gpu", marks=RunIf(min_cuda_gpus=1)), + "cpu", ], ) def test_metric_are_properly_reduced(tmpdir, accelerator): class TestingModel(BoringModel): - def __init__(self, *args, **kwargs) -> None: + def __init__(self) -> None: super().__init__() self.val_acc = Accuracy() @@ -592,7 +593,6 @@ def validation_step(self, batch, batch_idx): return super().validation_step(batch, batch_idx) early_stop = EarlyStopping(monitor="val_acc", mode="max") - checkpoint = ModelCheckpoint(monitor="val_acc", save_last=True, save_top_k=2, mode="max") model = TestingModel() @@ -812,3 +812,28 @@ def training_step(self, batch, batch_idx): call(metrics={"foo_epoch": 0.0, "epoch": 1}, step=3), ] ) + + +@mock.patch("pytorch_lightning.loggers.TensorBoardLogger.log_metrics") +def test_log_on_train_start(mock_log_metrics, tmpdir): + """Tests that logged metrics on_train_start get reset after the first epoch.""" + + class MyModel(BoringModel): + def on_train_start(self): + self.log("foo", 123) + + model = MyModel() + trainer = Trainer( + default_root_dir=tmpdir, + limit_train_batches=1, + limit_val_batches=0, + max_epochs=2, + log_every_n_steps=1, + enable_model_summary=False, + enable_checkpointing=False, + enable_progress_bar=False, + ) + trainer.fit(model) + + assert mock_log_metrics.mock_calls == [call(metrics={"foo": 123.0, "epoch": 0}, step=0)] + assert trainer.max_epochs > 1