From 03a8637d1285d93430276be552a67c8dc8f8d123 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Wed, 29 Dec 2021 03:40:54 +0530 Subject: [PATCH 1/9] raise error for best ckpt evaluate with multiple ckpt callbacks --- pytorch_lightning/trainer/trainer.py | 10 ++++++++-- tests/trainer/test_trainer.py | 10 ++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 5a6f67c9ea0d3..3a958ed3c5e70 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1381,15 +1381,21 @@ def __set_ckpt_path(self, ckpt_path: Optional[str], model_provided: bool, model_ ckpt_path = "best" if ckpt_path == "best": - # if user requests the best checkpoint but we don't have it, error + if len(self.checkpoint_callbacks) > 1: + raise MisconfigurationException( + f'.{fn}(ckpt_path="best" is not supported with multiple `ModelCheckpoint` callbacks.' + " Please pass in the exact checkpoint path." + ) + if not self.checkpoint_callback: raise MisconfigurationException( f'`.{fn}(ckpt_path="best")` is set but `ModelCheckpoint` is not configured.' ) + if not self.checkpoint_callback.best_model_path: if self.fast_dev_run: raise MisconfigurationException( - f"You cannot execute `.{fn}()` with `fast_dev_run=True` unless you do" + f'You cannot execute `.{fn}(ckpt_path="best")` with `fast_dev_run=True` unless you do' f" `.{fn}(ckpt_path=PATH)` as no checkpoint path was generated during fitting." ) raise MisconfigurationException( diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 52bd2305d74ca..41865d4f9eaad 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -774,6 +774,16 @@ def predict_step(self, batch, *_): trainer_fn(model, ckpt_path="best") +@pytest.mark.parametrize("fn", ("validate", "test", "predict")) +def test_best_ckpt_evaluate_raises_error_with_multiple_ckpt_callbacks(tmpdir, fn): + """Test that an error is raised if best ckpt callback is used for evaluation configured with multiple + checkpoints.""" + trainer = Trainer(default_root_dir=tmpdir, max_steps=1, callbacks=[ModelCheckpoint(), ModelCheckpoint()]) + trainer_fn = getattr(trainer, fn) + with pytest.raises(MisconfigurationException, match="not supported with multiple `ModelCheckpoint` callbacks"): + trainer_fn(BoringModel(), ckpt_path="best") + + def test_disabled_training(tmpdir): """Verify that `limit_train_batches=0` disables the training loop unless `fast_dev_run=True`.""" From e80779fe3053a402651d46931fc72a571d7182a8 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Wed, 29 Dec 2021 20:39:39 +0530 Subject: [PATCH 2/9] chlog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 486ef208c591a..a4a9282db3c94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -178,6 +178,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed `DeviceStatsMonitor` to group metrics based on the logger's `group_separator` ([#11254](https://github.com/PyTorchLightning/pytorch-lightning/pull/11254)) +- Raised `MisconfigurationException` if evaulation is triggered with `best` ckpt but trainer is configured with multiple checkpoint callbacks ([#11274](https://github.com/PyTorchLightning/pytorch-lightning/pull/11274)) +>>>>>>> e1fef6e29 (chlog) + + ### Deprecated - Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/PyTorchLightning/pytorch-lightning/issues/10103)) From fc9f0e6a8e3f1ecee8e1b2fe93e5a50d33ba42fa Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Thu, 30 Dec 2021 02:34:07 +0530 Subject: [PATCH 3/9] update test --- tests/trainer/test_trainer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 41865d4f9eaad..e813f40be0a5f 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -1809,15 +1809,15 @@ def on_fit_start(self, trainer, pl_module: LightningModule) -> None: trainer.fit(model, datamodule=dm) -def test_exception_when_testing_or_validating_with_fast_dev_run(tmpdir): +@pytest.mark.parametrize("fn", ["validate", "test", "predict"]) +def test_exception_when_testing_or_validating_with_fast_dev_run(tmpdir, fn): trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) model = BoringModel() trainer.fit(model) - with pytest.raises(MisconfigurationException, match=r"\.validate\(\)` with `fast_dev_run=True"): - trainer.validate() - with pytest.raises(MisconfigurationException, match=r"\.test\(\)` with `fast_dev_run=True"): - trainer.test() + trainer_fn = getattr(trainer, fn) + with pytest.raises(MisconfigurationException, match=fr'\.{fn}\(ckpt_path="best"\)` with `fast_dev_run=True'): + trainer_fn() class TrainerStagesModel(BoringModel): From 15460423cdd4bb5ce1f67dcba6e17c1f559f91bf Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Sat, 1 Jan 2022 02:12:41 +0530 Subject: [PATCH 4/9] use warning instead --- CHANGELOG.md | 4 ++-- pytorch_lightning/trainer/trainer.py | 10 +++++----- tests/trainer/test_trainer.py | 26 +++++++++++++++++++++++--- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a4a9282db3c94..77961508fcc7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -178,8 +178,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed `DeviceStatsMonitor` to group metrics based on the logger's `group_separator` ([#11254](https://github.com/PyTorchLightning/pytorch-lightning/pull/11254)) -- Raised `MisconfigurationException` if evaulation is triggered with `best` ckpt but trainer is configured with multiple checkpoint callbacks ([#11274](https://github.com/PyTorchLightning/pytorch-lightning/pull/11274)) ->>>>>>> e1fef6e29 (chlog) +- Raised `UserWarning` if evaluation is triggered with `best` ckpt and trainer is configured with multiple checkpoint callbacks ([#11274](https://github.com/PyTorchLightning/pytorch-lightning/pull/11274)) +>>>>>>> a5ca72d8d (use warning instead) ### Deprecated diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 3a958ed3c5e70..87a47b3db5e3c 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1382,9 +1382,9 @@ def __set_ckpt_path(self, ckpt_path: Optional[str], model_provided: bool, model_ if ckpt_path == "best": if len(self.checkpoint_callbacks) > 1: - raise MisconfigurationException( - f'.{fn}(ckpt_path="best" is not supported with multiple `ModelCheckpoint` callbacks.' - " Please pass in the exact checkpoint path." + rank_zero_warn( + f'`.{fn}(ckpt_path="best")` is called with Trainer configured with multiple `ModelCheckpoint`' + " callbacks. It will use the best checkpoint path from first checkpoint callback." ) if not self.checkpoint_callback: @@ -1395,8 +1395,8 @@ def __set_ckpt_path(self, ckpt_path: Optional[str], model_provided: bool, model_ if not self.checkpoint_callback.best_model_path: if self.fast_dev_run: raise MisconfigurationException( - f'You cannot execute `.{fn}(ckpt_path="best")` with `fast_dev_run=True` unless you do' - f" `.{fn}(ckpt_path=PATH)` as no checkpoint path was generated during fitting." + f'You cannot execute `.{fn}(ckpt_path="best")` with `fast_dev_run=True`.' + f" an exact checkpoint path" ) raise MisconfigurationException( f'`.{fn}(ckpt_path="best")` is set but `ModelCheckpoint` is not configured to save the best model.' diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index e813f40be0a5f..2b47302fb54d7 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -778,10 +778,30 @@ def predict_step(self, batch, *_): def test_best_ckpt_evaluate_raises_error_with_multiple_ckpt_callbacks(tmpdir, fn): """Test that an error is raised if best ckpt callback is used for evaluation configured with multiple checkpoints.""" - trainer = Trainer(default_root_dir=tmpdir, max_steps=1, callbacks=[ModelCheckpoint(), ModelCheckpoint()]) + + class TestModel(BoringModel): + def validation_step(self, batch, batch_idx): + self.log("foo", batch_idx) + self.log("bar", batch_idx + 1) + return super().validation_step(batch, batch_idx) + + ckpt_callbacks = [ModelCheckpoint(monitor="foo", save_top_k=1), ModelCheckpoint(monitor="bar", save_top_k=1)] + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=1, + limit_train_batches=1, + callbacks=ckpt_callbacks, + limit_test_batches=1, + limit_val_batches=1, + limit_predict_batches=1, + ) + + model = TestModel() + trainer.fit(model) + trainer_fn = getattr(trainer, fn) - with pytest.raises(MisconfigurationException, match="not supported with multiple `ModelCheckpoint` callbacks"): - trainer_fn(BoringModel(), ckpt_path="best") + with pytest.warns(UserWarning, match="best checkpoint path from first checkpoint callback"): + trainer_fn(ckpt_path="best") def test_disabled_training(tmpdir): From e8f50cf6ac5d0d277059955320c9e18d4ba6003a Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Tue, 4 Jan 2022 02:02:39 +0530 Subject: [PATCH 5/9] update to unit test --- pytorch_lightning/trainer/trainer.py | 2 +- tests/trainer/test_trainer.py | 30 +++++++++------------------- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 87a47b3db5e3c..c50ba8e304359 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1396,7 +1396,7 @@ def __set_ckpt_path(self, ckpt_path: Optional[str], model_provided: bool, model_ if self.fast_dev_run: raise MisconfigurationException( f'You cannot execute `.{fn}(ckpt_path="best")` with `fast_dev_run=True`.' - f" an exact checkpoint path" + f" Please pass an exact checkpoint path in `.{fn}`" ) raise MisconfigurationException( f'`.{fn}(ckpt_path="best")` is set but `ModelCheckpoint` is not configured to save the best model.' diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 2b47302fb54d7..ea571ebf1cc4c 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -774,34 +774,22 @@ def predict_step(self, batch, *_): trainer_fn(model, ckpt_path="best") -@pytest.mark.parametrize("fn", ("validate", "test", "predict")) -def test_best_ckpt_evaluate_raises_error_with_multiple_ckpt_callbacks(tmpdir, fn): - """Test that an error is raised if best ckpt callback is used for evaluation configured with multiple +def test_best_ckpt_evaluate_raises_warning_with_multiple_ckpt_callbacks(tmpdir): + """Test that a warning is raised if best ckpt callback is used for evaluation configured with multiple checkpoints.""" - class TestModel(BoringModel): - def validation_step(self, batch, batch_idx): - self.log("foo", batch_idx) - self.log("bar", batch_idx + 1) - return super().validation_step(batch, batch_idx) - - ckpt_callbacks = [ModelCheckpoint(monitor="foo", save_top_k=1), ModelCheckpoint(monitor="bar", save_top_k=1)] + ckpt_callback1 = ModelCheckpoint(monitor="foo", save_top_k=1) + ckpt_callback1.best_model_path = "foo_best_model.ckpt" + ckpt_callback2 = ModelCheckpoint(monitor="bar", save_top_k=1) + ckpt_callback2.best_model_path = "bar_best_model.ckpt" + ckpt_callbacks = [ckpt_callback1, ckpt_callback2] trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - limit_train_batches=1, callbacks=ckpt_callbacks, - limit_test_batches=1, - limit_val_batches=1, - limit_predict_batches=1, ) + trainer.state.fn = TrainerFn.TESTING - model = TestModel() - trainer.fit(model) - - trainer_fn = getattr(trainer, fn) with pytest.warns(UserWarning, match="best checkpoint path from first checkpoint callback"): - trainer_fn(ckpt_path="best") + trainer._Trainer__set_ckpt_path(ckpt_path="best", model_provided=False, model_connected=True) def test_disabled_training(tmpdir): From dd6c6f23637f2277d2adbd085d82e105565edf20 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Tue, 4 Jan 2022 02:04:48 +0530 Subject: [PATCH 6/9] bad rebase --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77961508fcc7d..8da3154bbe94b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -179,7 +179,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Raised `UserWarning` if evaluation is triggered with `best` ckpt and trainer is configured with multiple checkpoint callbacks ([#11274](https://github.com/PyTorchLightning/pytorch-lightning/pull/11274)) ->>>>>>> a5ca72d8d (use warning instead) ### Deprecated From 84436be887f22895f6eaaba88fb5769f17daf719 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Tue, 4 Jan 2022 02:14:38 +0530 Subject: [PATCH 7/9] improve test --- pytorch_lightning/trainer/trainer.py | 2 +- tests/trainer/test_trainer.py | 16 ++++++---------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index c50ba8e304359..cebccba907ddb 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1396,7 +1396,7 @@ def __set_ckpt_path(self, ckpt_path: Optional[str], model_provided: bool, model_ if self.fast_dev_run: raise MisconfigurationException( f'You cannot execute `.{fn}(ckpt_path="best")` with `fast_dev_run=True`.' - f" Please pass an exact checkpoint path in `.{fn}`" + f" Please pass an exact checkpoint path in `.{fn}()`" ) raise MisconfigurationException( f'`.{fn}(ckpt_path="best")` is set but `ModelCheckpoint` is not configured to save the best model.' diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index ea571ebf1cc4c..d8cb94d5102c7 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -774,7 +774,7 @@ def predict_step(self, batch, *_): trainer_fn(model, ckpt_path="best") -def test_best_ckpt_evaluate_raises_warning_with_multiple_ckpt_callbacks(tmpdir): +def test_best_ckpt_evaluate_raises_warning_with_multiple_ckpt_callbacks(): """Test that a warning is raised if best ckpt callback is used for evaluation configured with multiple checkpoints.""" @@ -1817,15 +1817,11 @@ def on_fit_start(self, trainer, pl_module: LightningModule) -> None: trainer.fit(model, datamodule=dm) -@pytest.mark.parametrize("fn", ["validate", "test", "predict"]) -def test_exception_when_testing_or_validating_with_fast_dev_run(tmpdir, fn): - trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) - model = BoringModel() - trainer.fit(model) - - trainer_fn = getattr(trainer, fn) - with pytest.raises(MisconfigurationException, match=fr'\.{fn}\(ckpt_path="best"\)` with `fast_dev_run=True'): - trainer_fn() +def test_exception_when_testing_or_validating_with_fast_dev_run(): + trainer = Trainer(fast_dev_run=True) + trainer.state.fn = TrainerFn.TESTING + with pytest.raises(MisconfigurationException, match=r"with `fast_dev_run=True`. .* pass an exact checkpoint path"): + trainer._Trainer__set_ckpt_path(ckpt_path="best", model_provided=False, model_connected=True) class TrainerStagesModel(BoringModel): From 92c01203e7cebe748ec2c1e78db4f32808b039a0 Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Tue, 4 Jan 2022 03:07:23 +0530 Subject: [PATCH 8/9] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos MocholĂ­ --- pytorch_lightning/trainer/trainer.py | 2 +- tests/trainer/test_trainer.py | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index cebccba907ddb..d89ad75411c8c 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1396,7 +1396,7 @@ def __set_ckpt_path(self, ckpt_path: Optional[str], model_provided: bool, model_ if self.fast_dev_run: raise MisconfigurationException( f'You cannot execute `.{fn}(ckpt_path="best")` with `fast_dev_run=True`.' - f" Please pass an exact checkpoint path in `.{fn}()`" + f" Please pass an exact checkpoint path to `.{fn}(ckpt_path=...)`" ) raise MisconfigurationException( f'`.{fn}(ckpt_path="best")` is set but `ModelCheckpoint` is not configured to save the best model.' diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index d8cb94d5102c7..281afae7c30f7 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -778,14 +778,11 @@ def test_best_ckpt_evaluate_raises_warning_with_multiple_ckpt_callbacks(): """Test that a warning is raised if best ckpt callback is used for evaluation configured with multiple checkpoints.""" - ckpt_callback1 = ModelCheckpoint(monitor="foo", save_top_k=1) + ckpt_callback1 = ModelCheckpoint() ckpt_callback1.best_model_path = "foo_best_model.ckpt" - ckpt_callback2 = ModelCheckpoint(monitor="bar", save_top_k=1) + ckpt_callback2 = ModelCheckpoint() ckpt_callback2.best_model_path = "bar_best_model.ckpt" - ckpt_callbacks = [ckpt_callback1, ckpt_callback2] - trainer = Trainer( - callbacks=ckpt_callbacks, - ) + trainer = Trainer(callbacks=[ckpt_callback1, ckpt_callback2]) trainer.state.fn = TrainerFn.TESTING with pytest.warns(UserWarning, match="best checkpoint path from first checkpoint callback"): From 8e49ee47095d8a8fe3432093ba20e10fef98e093 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 4 Jan 2022 13:00:00 +0000 Subject: [PATCH 9/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8439f7edea363..19111ecb3d29a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -186,7 +186,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Raised `UserWarning` if evaluation is triggered with `best` ckpt and trainer is configured with multiple checkpoint callbacks ([#11274](https://github.com/PyTorchLightning/pytorch-lightning/pull/11274)) - + - `Trainer.logged_metrics` now always contains scalar tensors, even when a Python scalar was logged ([#11270](https://github.com/PyTorchLightning/pytorch-lightning/pull/11270))