From 7464aca44e73f72998c411e999e085e4fdfd7568 Mon Sep 17 00:00:00 2001 From: Gianluca Scarpellini Date: Thu, 7 Jan 2021 11:50:08 +0100 Subject: [PATCH 1/3] test_cpu and test_gpu EvalModelTemplate deprecation (#4820) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * test_cpu refactoring - BoringModel and checkpoints; test_gpu refactoring - BoringModelboring_model refactoring - validation, testing; Fix - run_prediction as dispatcher for testing BoringModel * Removed EvalModelTemplate import from test_cpu and test_gpu * Reverting unintended changes * Issues with checkpointing * Fixed tests for logging and checkpointing * Fix for dispatcher * test_cpu refactoring - BoringModel and checkpoints; test_gpu refactoring - BoringModelboring_model refactoring - validation, testing; Fix - run_prediction as dispatcher for testing BoringModel * Removed EvalModelTemplate import from test_cpu and test_gpu * Reverting unintended changes * Issues with checkpointing * Fixed tests for logging and checkpointing * Fix for dispatcher * Fixed acc check for stocasticity of seeds * Fixed according to @borda suggestions * Hparams for boring_model * Deprecated RuntimeParamChagneModelAssing (functionality is tested in RuntimeParamChangeModelSaving) * Reduced boring_model parameters to just in and out features, test_cpu modelsinherit BoringModel to specify additional parameters (e.g., optimizer) * Fix PEP8 * Update tests/base/develop_pipelines.py Co-authored-by: Rohit Gupta * Update tests/base/boring_model.py Co-authored-by: Rohit Gupta * Update tests/base/develop_pipelines.py Co-authored-by: Rohit Gupta * Update tests/models/test_cpu.py Co-authored-by: Rohit Gupta * Update tests/models/test_cpu.py Co-authored-by: Rohit Gupta * Merged test_early_stopping with all_features; added TODO for self.log * Fixed test_all_features trainer options * Ready for review! * Update tests/models/test_cpu.py Thank you! :) Co-authored-by: Rohit Gupta * Update tests/models/test_cpu.py Co-authored-by: Rohit Gupta * Update tests/models/test_cpu.py Co-authored-by: Rohit Gupta * Update tests/models/test_cpu.py Co-authored-by: Rohit Gupta * Update tests/models/test_cpu.py Co-authored-by: Rohit Gupta * added optimizer_name, lr, and batch_size as hparams for save_hparameters() * Fixes for reducing PR size * Reverse test_hparams (removed DEPRECATED test for hparams direct assignment) * Changes for in_features * Fixed hparams * Fixed parameters for boring_model * Update tests/models/test_cpu.py Co-authored-by: Carlos Mocholí * Update tests/models/test_cpu.py Co-authored-by: Carlos Mocholí * Update tests/models/test_cpu.py Co-authored-by: Carlos Mocholí * fix for pep8 * Fixed run_predction and TODO * fix min acc for darwin/windows without pl_opt * eval as DEFAULT run_prediction strategy * Updated val_dataloader for running_test_no_val Co-authored-by: Rohit Gupta Co-authored-by: chaton Co-authored-by: Carlos Mocholí --- tests/base/develop_pipelines.py | 46 +++-- .../data/horovod/train_default_model.py | 2 +- tests/models/test_cpu.py | 168 +++++++++--------- tests/models/test_gpu.py | 11 +- tests/models/test_hparams.py | 16 +- tests/models/test_restore.py | 7 +- tests/trainer/test_dataloaders.py | 4 +- 7 files changed, 132 insertions(+), 122 deletions(-) diff --git a/tests/base/develop_pipelines.py b/tests/base/develop_pipelines.py index 24535dc67da8e..b6289079a35ab 100644 --- a/tests/base/develop_pipelines.py +++ b/tests/base/develop_pipelines.py @@ -14,8 +14,8 @@ import torch from pytorch_lightning import Trainer -from tests.base.develop_utils import load_model_from_checkpoint, get_default_logger, \ - reset_seed +from tests.base import BoringModel +from tests.base.develop_utils import get_default_logger, load_model_from_checkpoint, reset_seed def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50): @@ -31,6 +31,7 @@ def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50 pretrained_model = load_model_from_checkpoint( trainer.logger, trainer.checkpoint_callback.best_model_path, + type(model) ) # test new model accuracy @@ -39,7 +40,7 @@ def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50 test_loaders = [test_loaders] for dataloader in test_loaders: - run_prediction(dataloader, pretrained_model, min_acc=min_acc) + run_prediction(pretrained_model, dataloader, min_acc=min_acc) if trainer.use_ddp: # on hpc this would work fine... but need to hack it for the purpose of the test @@ -47,7 +48,8 @@ def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50 trainer.optimizers, trainer.lr_schedulers = pretrained_model.configure_optimizers() -def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, with_hpc: bool = True): +def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, + with_hpc: bool = True, min_acc: float = 0.25): reset_seed() save_dir = trainer_options['default_root_dir'] @@ -56,9 +58,6 @@ def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, wi logger = get_default_logger(save_dir, version=version) trainer_options.update(logger=logger) - if 'checkpoint_callback' not in trainer_options: - trainer_options.update(checkpoint_callback=True) - trainer = Trainer(**trainer_options) initial_values = torch.tensor([torch.sum(torch.abs(x)) for x in model.parameters()]) result = trainer.fit(model) @@ -66,10 +65,11 @@ def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, wi assert result == 1, 'trainer failed' # Check that the model is actually changed post-training - assert torch.norm(initial_values - post_train_values) > 0.1 + change_ratio = torch.norm(initial_values - post_train_values) + assert change_ratio > 0.1, f"the model is changed of {change_ratio}" # test model loading - pretrained_model = load_model_from_checkpoint(logger, trainer.checkpoint_callback.best_model_path) + pretrained_model = load_model_from_checkpoint(logger, trainer.checkpoint_callback.best_model_path, type(model)) # test new model accuracy test_loaders = model.test_dataloader() @@ -77,14 +77,15 @@ def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, wi test_loaders = [test_loaders] for dataloader in test_loaders: - run_prediction(dataloader, pretrained_model) + run_prediction(pretrained_model, dataloader, min_acc=min_acc) if with_hpc: if trainer.use_ddp or trainer.use_ddp2: # on hpc this would work fine... but need to hack it for the purpose of the test trainer.model = pretrained_model - trainer.optimizers, trainer.lr_schedulers, trainer.optimizer_frequencies = \ - trainer.init_optimizers(pretrained_model) + trainer.optimizers, trainer.lr_schedulers, trainer.optimizer_frequencies = trainer.init_optimizers( + pretrained_model + ) # test HPC saving trainer.checkpoint_connector.hpc_save(save_dir, logger) @@ -93,7 +94,14 @@ def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, wi trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=on_gpu) -def run_prediction(dataloader, trained_model, dp=False, min_acc=0.50): +def run_prediction(trained_model, dataloader, dp=False, min_acc=0.25): + if isinstance(trained_model, BoringModel): + return _boring_model_run_prediction(trained_model, dataloader, dp, min_acc) + else: + return _eval_model_template_run_prediction(trained_model, dataloader, dp, min_acc) + + +def _eval_model_template_run_prediction(trained_model, dataloader, dp=False, min_acc=0.50): # run prediction on 1 batch batch = next(iter(dataloader)) x, y = batch @@ -102,7 +110,7 @@ def run_prediction(dataloader, trained_model, dp=False, min_acc=0.50): if dp: with torch.no_grad(): output = trained_model(batch, 0) - acc = output['val_acc'] + acc = output['val_acc'] acc = torch.mean(acc).item() else: @@ -119,3 +127,13 @@ def run_prediction(dataloader, trained_model, dp=False, min_acc=0.50): acc = acc.item() assert acc >= min_acc, f"This model is expected to get > {min_acc} in test set (it got {acc})" + + +def _boring_model_run_prediction(trained_model, dataloader, dp=False, min_acc=0.25): + # run prediction on 1 batch + batch = next(iter(dataloader)) + with torch.no_grad(): + output = trained_model(batch) + acc = trained_model.loss(batch, output) + + assert acc >= min_acc, f"This model is expected to get, {min_acc} in test set but got {acc}" diff --git a/tests/models/data/horovod/train_default_model.py b/tests/models/data/horovod/train_default_model.py index 62f874902b094..c38b5b4efafe8 100644 --- a/tests/models/data/horovod/train_default_model.py +++ b/tests/models/data/horovod/train_default_model.py @@ -72,7 +72,7 @@ def run_test_from_config(trainer_options): test_loaders = [test_loaders] for dataloader in test_loaders: - run_prediction(dataloader, pretrained_model) + run_prediction(pretrained_model, dataloader) # test HPC saving trainer.checkpoint_connector.hpc_save(ckpt_path, trainer.logger) diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py index 892077ccdb1be..cc24f6f187502 100644 --- a/tests/models/test_cpu.py +++ b/tests/models/test_cpu.py @@ -21,15 +21,14 @@ import tests.base.develop_pipelines as tpipes import tests.base.develop_utils as tutils from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint -from tests.base import EvalModelTemplate +from pytorch_lightning.callbacks import Callback, EarlyStopping, ModelCheckpoint +from tests.base import BoringModel @pytest.mark.parametrize("enable_pl_optimizer", [False, True]) def test_cpu_slurm_save_load(enable_pl_optimizer, tmpdir): """Verify model save/load/checkpoint on CPU.""" - hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(**hparams) + model = BoringModel() # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -61,11 +60,8 @@ def test_cpu_slurm_save_load(enable_pl_optimizer, tmpdir): for batch in dataloader: break - x, y = batch - x = x.view(x.size(0), -1) - model.eval() - pred_before_saving = model(x) + pred_before_saving = model(batch) # test HPC saving # simulate snapshot on slurm @@ -75,26 +71,26 @@ def test_cpu_slurm_save_load(enable_pl_optimizer, tmpdir): # new logger file to get meta logger = tutils.get_default_logger(tmpdir, version=version) + model = BoringModel() + + class _StartCallback(Callback): + # set the epoch start hook so we can predict before the model does the full training + def on_train_epoch_start(self, trainer, model): + assert trainer.global_step == real_global_step and trainer.global_step > 0 + # predict with loaded model to make sure answers are the same + mode = model.training + model.eval() + new_pred = model(batch) + assert torch.eq(pred_before_saving, new_pred).all() + model.train(mode) + trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, logger=logger, - callbacks=[ModelCheckpoint(dirpath=tmpdir)], enable_pl_optimizer=enable_pl_optimizer, + callbacks=[_StartCallback(), ModelCheckpoint(dirpath=tmpdir)], ) - model = EvalModelTemplate(**hparams) - - # set the epoch start hook so we can predict before the model does the full training - def assert_pred_same(): - assert trainer.global_step == real_global_step and trainer.global_step > 0 - - # predict with loaded model to make sure answers are the same - trainer.model.eval() - new_pred = trainer.model(x) - assert torch.all(torch.eq(pred_before_saving, new_pred)).item() == 1 - - model.on_epoch_start = assert_pred_same - # by calling fit again, we trigger training, loading weights from the cluster # and our hook to predict using current model before any more weight updates trainer.fit(model) @@ -102,21 +98,26 @@ def assert_pred_same(): @pytest.mark.parametrize("enable_pl_optimizer", [False, True]) def test_early_stopping_cpu_model(enable_pl_optimizer, tmpdir): - """Test each of the trainer options.""" - stopping = EarlyStopping(monitor='early_stop_on', min_delta=0.1) + class ModelTrainVal(BoringModel): + def validation_epoch_end(self, outputs) -> None: + val_loss = torch.stack([x["x"] for x in outputs]).mean() + self.log('val_loss', val_loss) + + stopping = EarlyStopping(monitor="val_loss", min_delta=0.1) trainer_options = dict( - default_root_dir=tmpdir, callbacks=[stopping], - max_epochs=2, + default_root_dir=tmpdir, gradient_clip_val=1.0, overfit_batches=0.20, track_grad_norm=2, + enable_pl_optimizer=enable_pl_optimizer, + progress_bar_refresh_rate=0, + accumulate_grad_batches=2, limit_train_batches=0.1, limit_val_batches=0.1, - enable_pl_optimizer=enable_pl_optimizer, ) - model = EvalModelTemplate() + model = ModelTrainVal() tpipes.run_model_test(trainer_options, model, on_gpu=False) # test freeze on cpu @@ -146,26 +147,29 @@ def test_multi_cpu_model_ddp(enable_pl_optimizer, tmpdir): enable_pl_optimizer=enable_pl_optimizer, ) - model = EvalModelTemplate() - tpipes.run_model_test(trainer_options, model, on_gpu=False) + model = BoringModel() + tpipes.run_model_test(trainer_options, model, on_gpu=False, min_acc=0.05) def test_lbfgs_cpu_model(tmpdir): - """Test each of the trainer options.""" + """Test each of the trainer options. Testing LBFGS optimizer""" + class ModelSpecifiedOptimizer(BoringModel): + def __init__(self, optimizer_name, learning_rate): + super().__init__() + self.optimizer_name = optimizer_name + self.learning_rate = learning_rate + self.save_hyperparameters() + trainer_options = dict( default_root_dir=tmpdir, max_epochs=1, progress_bar_refresh_rate=0, - weights_summary='top', + weights_summary="top", limit_train_batches=0.2, limit_val_batches=0.2, ) - hparams = EvalModelTemplate.get_default_hparams() - hparams.update(optimizer_name='lbfgs', - learning_rate=0.004) - model = EvalModelTemplate(**hparams) - model.configure_optimizers = model.configure_optimizers__lbfgs + model = ModelSpecifiedOptimizer(optimizer_name="LBFGS", learning_rate=0.004) tpipes.run_model_test_without_loggers(trainer_options, model, min_acc=0.25) @@ -181,8 +185,8 @@ def test_default_logger_callbacks_cpu_model(tmpdir): limit_val_batches=0.01, ) - model = EvalModelTemplate() - tpipes.run_model_test_without_loggers(trainer_options, model) + model = BoringModel() + tpipes.run_model_test_without_loggers(trainer_options, model, min_acc=0.01) # test freeze on cpu model.freeze() @@ -191,7 +195,17 @@ def test_default_logger_callbacks_cpu_model(tmpdir): def test_running_test_after_fitting(tmpdir): """Verify test() on fitted model.""" - model = EvalModelTemplate() + class ModelTrainValTest(BoringModel): + + def validation_epoch_end(self, outputs) -> None: + val_loss = torch.stack([x["x"] for x in outputs]).mean() + self.log('val_loss', val_loss) + + def test_epoch_end(self, outputs) -> None: + test_loss = torch.stack([x["y"] for x in outputs]).mean() + self.log('test_loss', test_loss) + + model = ModelTrainValTest() # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -217,12 +231,22 @@ def test_running_test_after_fitting(tmpdir): trainer.test() # test we have good test accuracy - tutils.assert_ok_model_acc(trainer, thr=0.5) + tutils.assert_ok_model_acc(trainer, key='test_loss', thr=0.5) def test_running_test_no_val(tmpdir): - """Verify `test()` works on a model with no `val_loader`.""" - model = EvalModelTemplate() + """Verify `test()` works on a model with no `val_dataloader`. It performs + train and test only""" + class ModelTrainTest(BoringModel): + + def val_dataloader(self): + pass + + def test_epoch_end(self, outputs) -> None: + test_loss = torch.stack([x["y"] for x in outputs]).mean() + self.log('test_loss', test_loss) + + model = ModelTrainTest() # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -248,12 +272,12 @@ def test_running_test_no_val(tmpdir): trainer.test() # test we have good test accuracy - tutils.assert_ok_model_acc(trainer) + tutils.assert_ok_model_acc(trainer, key='test_loss') def test_simple_cpu(tmpdir): """Verify continue training session on CPU.""" - model = EvalModelTemplate() + model = BoringModel() # fit model trainer = Trainer( @@ -275,32 +299,12 @@ def test_cpu_model(tmpdir): progress_bar_refresh_rate=0, max_epochs=1, limit_train_batches=0.4, - limit_val_batches=0.4 - ) - - model = EvalModelTemplate() - - tpipes.run_model_test(trainer_options, model, on_gpu=False) - - -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_all_features_cpu_model(enable_pl_optimizer, tmpdir): - """Test each of the trainer options.""" - trainer_options = dict( - default_root_dir=tmpdir, - gradient_clip_val=1.0, - overfit_batches=0.20, - track_grad_norm=2, - progress_bar_refresh_rate=0, - accumulate_grad_batches=2, - max_epochs=1, - limit_train_batches=0.4, limit_val_batches=0.4, - enable_pl_optimizer=enable_pl_optimizer, ) - model = EvalModelTemplate() - tpipes.run_model_test(trainer_options, model, on_gpu=False) + model = BoringModel() + + tpipes.run_model_test(trainer_options, model, on_gpu=False, min_acc=0.01) def test_tbptt_cpu_model(tmpdir): @@ -319,10 +323,12 @@ def __getitem__(self, i): def __len__(self): return 1 - class BpttTestModel(EvalModelTemplate): - def __init__(self, *args, **kwargs): + class BpttTestModel(BoringModel): + def __init__(self, batch_size, in_features, out_features, *args, **kwargs): super().__init__(*args, **kwargs) self.test_hidden = None + self.batch_size = batch_size + self.layer = torch.nn.Linear(in_features, out_features) def training_step(self, batch, batch_idx, hiddens): assert hiddens == self.test_hidden, "Hidden state not persistent between tbptt steps" @@ -335,18 +341,17 @@ def training_step(self, batch, batch_idx, hiddens): assert y_tensor.shape[1] == truncated_bptt_steps, "tbptt split list failed" pred = self(x_tensor.view(batch_size, truncated_bptt_steps)) - loss_val = torch.nn.functional.mse_loss( - pred, y_tensor.view(batch_size, truncated_bptt_steps)) + loss_val = torch.nn.functional.mse_loss(pred, y_tensor.view(batch_size, truncated_bptt_steps)) return { - 'loss': loss_val, - 'hiddens': self.test_hidden, + "loss": loss_val, + "hiddens": self.test_hidden, } def training_epoch_end(self, training_step_outputs): training_step_outputs = training_step_outputs[0] assert len(training_step_outputs) == (sequence_size / truncated_bptt_steps) - loss = torch.stack([x['loss'] for x in training_step_outputs]).mean() - self.log('train_loss', loss) + loss = torch.stack([x["loss"] for x in training_step_outputs]).mean() + self.log("train_loss", loss) def train_dataloader(self): return torch.utils.data.DataLoader( @@ -356,15 +361,8 @@ def train_dataloader(self): sampler=None, ) - hparams = EvalModelTemplate.get_default_hparams() - hparams.update( - batch_size=batch_size, - in_features=truncated_bptt_steps, - hidden_dim=truncated_bptt_steps, - out_features=truncated_bptt_steps - ) - - model = BpttTestModel(**hparams) + model = BpttTestModel(batch_size=batch_size, + in_features=truncated_bptt_steps, out_features=truncated_bptt_steps) model.example_input_array = torch.randn(5, truncated_bptt_steps) # fit model diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 169552ce1bd75..7cfeb8f0ae53e 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -21,11 +21,10 @@ import tests.base.develop_pipelines as tpipes import tests.base.develop_utils as tutils from pytorch_lightning import Trainer +from pytorch_lightning.accelerators.gpu_accelerator import GPUAccelerator from pytorch_lightning.utilities import device_parser from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests.base import EvalModelTemplate -from pytorch_lightning.accelerators.gpu_accelerator import GPUAccelerator - +from tests.base import BoringModel PRETEND_N_OF_GPUS = 16 @@ -43,8 +42,8 @@ def test_multi_gpu_none_backend(tmpdir): gpus=2, ) - model = EvalModelTemplate() - tpipes.run_model_test(trainer_options, model) + model = BoringModel() + tpipes.run_model_test(trainer_options, model, min_acc=0.20) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @@ -60,7 +59,7 @@ def test_single_gpu_model(tmpdir, gpus): gpus=gpus ) - model = EvalModelTemplate() + model = BoringModel() tpipes.run_model_test(trainer_options, model) diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index 5e5fab7d0a0b4..7081d450ee256 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -20,14 +20,14 @@ import pytest import torch from fsspec.implementations.local import LocalFileSystem -from omegaconf import OmegaConf, Container +from omegaconf import Container, OmegaConf from torch.nn import functional as F from torch.utils.data import DataLoader -from pytorch_lightning import Trainer, LightningModule -from pytorch_lightning.core.saving import save_hparams_to_yaml, load_hparams_from_yaml +from pytorch_lightning import LightningModule, Trainer +from pytorch_lightning.core.saving import load_hparams_from_yaml, save_hparams_to_yaml from pytorch_lightning.utilities import AttributeDict, is_picklable -from tests.base import EvalModelTemplate, TrialMNIST, BoringModel +from tests.base import BoringModel, EvalModelTemplate, TrialMNIST class SaveHparamsModel(BoringModel): @@ -595,13 +595,7 @@ def __init__(self, **kwargs): self.save_hyperparameters() -class RuntimeParamChangeModelAssign(BoringModel): - def __init__(self, **kwargs): - super().__init__() - self.hparams = kwargs - - -@pytest.mark.parametrize("cls", [RuntimeParamChangeModelSaving, RuntimeParamChangeModelAssign]) +@pytest.mark.parametrize("cls", [RuntimeParamChangeModelSaving]) def test_init_arg_with_runtime_change(tmpdir, cls): """Test that we save/export only the initial hparams, no other runtime change allowed""" model = cls(running_arg=123) diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index a2a9aa6b9042c..6ee5d362ffcaa 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -161,6 +161,7 @@ def test_callbacks_references_resume_from_checkpoint(enable_pl_optimizer, tmpdir @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_running_test_pretrained_model_distrib_dp(tmpdir): """Verify `test()` on pretrained model.""" + tutils.set_random_master_port() model = EvalModelTemplate() @@ -205,7 +206,7 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir): dataloaders = [dataloaders] for dataloader in dataloaders: - tpipes.run_prediction(dataloader, pretrained_model) + tpipes.run_prediction(pretrained_model, dataloader) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @@ -256,7 +257,7 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir): dataloaders = [dataloaders] for dataloader in dataloaders: - tpipes.run_prediction(dataloader, pretrained_model) + tpipes.run_prediction(pretrained_model, dataloader) def test_running_test_pretrained_model_cpu(tmpdir): @@ -398,7 +399,7 @@ def assert_good_acc(): dp_model.eval() dataloader = trainer.train_dataloader - tpipes.run_prediction(dataloader, dp_model, dp=True) + tpipes.run_prediction(dp_model, dataloader, dp=True) # new model model = EvalModelTemplate(**hparams) diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py index 9b42aa98c9dd0..614b2a8e66ab8 100644 --- a/tests/trainer/test_dataloaders.py +++ b/tests/trainer/test_dataloaders.py @@ -128,7 +128,7 @@ def test_multiple_val_dataloader(tmpdir): # make sure predictions are good for each val set for dataloader in trainer.val_dataloaders: - tpipes.run_prediction(dataloader, trainer.model) + tpipes.run_prediction(trainer.model, dataloader) @pytest.mark.parametrize('ckpt_path', [None, 'best', 'specific']) @@ -164,7 +164,7 @@ def test_step(self, batch, batch_idx, *args, **kwargs): # make sure predictions are good for each test set for dataloader in trainer.test_dataloaders: - tpipes.run_prediction(dataloader, trainer.model) + tpipes.run_prediction(trainer.model, dataloader) # run the test method trainer.test(ckpt_path=ckpt_path) From 5ae6926a520ecaa21fd96f3ebd15b9069dbd880a Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 7 Jan 2021 14:01:52 +0100 Subject: [PATCH 2/3] fix some minor typos in docs (#5369) * fix docs typos * Apply suggestions from code review Co-authored-by: Wansoo Kim * flake8 Co-authored-by: Wansoo Kim --- .../accelerators/ddp_accelerator.py | 1 - .../accelerators/ddp_hpc_accelerator.py | 1 - .../accelerators/ddp_spawn_accelerator.py | 2 +- .../accelerators/tpu_accelerator.py | 1 - .../metrics/functional/average_precision.py | 2 +- .../metrics/functional/explained_variance.py | 2 +- pytorch_lightning/metrics/functional/f_beta.py | 4 ++-- .../metrics/functional/mean_squared_error.py | 2 +- .../functional/mean_squared_log_error.py | 2 +- .../functional/precision_recall_curve.py | 2 +- pytorch_lightning/metrics/functional/psnr.py | 2 -- pytorch_lightning/metrics/functional/r2score.py | 2 +- pytorch_lightning/metrics/functional/roc.py | 2 +- pytorch_lightning/metrics/functional/ssim.py | 2 +- pytorch_lightning/metrics/utils.py | 2 +- pytorch_lightning/overrides/data_parallel.py | 17 ++++++++++++----- .../trainer/connectors/checkpoint_connector.py | 3 ++- pytorch_lightning/utilities/argparse.py | 2 ++ 18 files changed, 28 insertions(+), 23 deletions(-) diff --git a/pytorch_lightning/accelerators/ddp_accelerator.py b/pytorch_lightning/accelerators/ddp_accelerator.py index e34a7183a5334..56f6eaa2223a3 100644 --- a/pytorch_lightning/accelerators/ddp_accelerator.py +++ b/pytorch_lightning/accelerators/ddp_accelerator.py @@ -223,7 +223,6 @@ def ddp_train(self, process_idx, model): Args: process_idx: - mp_queue: multiprocessing queue model: Returns: diff --git a/pytorch_lightning/accelerators/ddp_hpc_accelerator.py b/pytorch_lightning/accelerators/ddp_hpc_accelerator.py index 47c1b736fd8b4..cf6aad9999223 100644 --- a/pytorch_lightning/accelerators/ddp_hpc_accelerator.py +++ b/pytorch_lightning/accelerators/ddp_hpc_accelerator.py @@ -112,7 +112,6 @@ def ddp_train(self, process_idx, model): Args: process_idx: - mp_queue: multiprocessing queue model: Returns: diff --git a/pytorch_lightning/accelerators/ddp_spawn_accelerator.py b/pytorch_lightning/accelerators/ddp_spawn_accelerator.py index 9410984df20fc..e23943e9262f8 100644 --- a/pytorch_lightning/accelerators/ddp_spawn_accelerator.py +++ b/pytorch_lightning/accelerators/ddp_spawn_accelerator.py @@ -86,7 +86,7 @@ def train(self): self.__recover_child_process_weights(model, best_path, last_path) return results - def ddp_train(self, process_idx, mp_queue, model, is_master=False, proc_offset=0): + def ddp_train(self, process_idx, mp_queue, model, is_master: bool = False, proc_offset: int = 0): """ Entry point for ddp diff --git a/pytorch_lightning/accelerators/tpu_accelerator.py b/pytorch_lightning/accelerators/tpu_accelerator.py index 6a3263095ee67..66fc236a2a775 100644 --- a/pytorch_lightning/accelerators/tpu_accelerator.py +++ b/pytorch_lightning/accelerators/tpu_accelerator.py @@ -171,7 +171,6 @@ def to_device(self, batch): Args: batch: A tensor or collection of tensors. - tpu_id: The id of the TPU core. If omitted, the first available core is chosen. Return: the tensor on the TPU device. diff --git a/pytorch_lightning/metrics/functional/average_precision.py b/pytorch_lightning/metrics/functional/average_precision.py index da4f37b073206..20317b81d5265 100644 --- a/pytorch_lightning/metrics/functional/average_precision.py +++ b/pytorch_lightning/metrics/functional/average_precision.py @@ -67,7 +67,7 @@ def average_precision( which for binary problem is translate to 1. For multiclass problems this argument should not be set as we iteratively change it in the range [0,num_classes-1] - sample_weight: sample weights for each data point + sample_weights: sample weights for each data point Returns: tensor with average precision. If multiclass will return list diff --git a/pytorch_lightning/metrics/functional/explained_variance.py b/pytorch_lightning/metrics/functional/explained_variance.py index 20b38c58a2a6b..20550435ee370 100644 --- a/pytorch_lightning/metrics/functional/explained_variance.py +++ b/pytorch_lightning/metrics/functional/explained_variance.py @@ -62,7 +62,7 @@ def explained_variance( Computes explained variance. Args: - pred: estimated labels + preds: estimated labels target: ground truth labels multioutput: Defines aggregation in the case of multiple output scores. Can be one of the following strings (default is `'uniform_average'`.): diff --git a/pytorch_lightning/metrics/functional/f_beta.py b/pytorch_lightning/metrics/functional/f_beta.py index 2b0ba194d56f0..c294d29805a6f 100755 --- a/pytorch_lightning/metrics/functional/f_beta.py +++ b/pytorch_lightning/metrics/functional/f_beta.py @@ -75,7 +75,7 @@ def fbeta( If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. Args: - pred: estimated probabilities + preds: estimated probabilities target: ground-truth labels num_classes: Number of classes in the dataset. beta: Beta coefficient in the F measure. @@ -128,7 +128,7 @@ def f1( If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. Args: - pred: estimated probabilities + preds: estimated probabilities target: ground-truth labels num_classes: Number of classes in the dataset. threshold: diff --git a/pytorch_lightning/metrics/functional/mean_squared_error.py b/pytorch_lightning/metrics/functional/mean_squared_error.py index e418536b26973..f8407531e9057 100644 --- a/pytorch_lightning/metrics/functional/mean_squared_error.py +++ b/pytorch_lightning/metrics/functional/mean_squared_error.py @@ -34,7 +34,7 @@ def mean_squared_error(preds: torch.Tensor, target: torch.Tensor) -> torch.Tenso Computes mean squared error Args: - pred: estimated labels + preds: estimated labels target: ground truth labels Return: diff --git a/pytorch_lightning/metrics/functional/mean_squared_log_error.py b/pytorch_lightning/metrics/functional/mean_squared_log_error.py index 1b96e1a7abc10..59012a607bba3 100644 --- a/pytorch_lightning/metrics/functional/mean_squared_log_error.py +++ b/pytorch_lightning/metrics/functional/mean_squared_log_error.py @@ -34,7 +34,7 @@ def mean_squared_log_error(preds: torch.Tensor, target: torch.Tensor) -> torch.T Computes mean squared log error Args: - pred: estimated labels + preds: estimated labels target: ground truth labels Return: diff --git a/pytorch_lightning/metrics/functional/precision_recall_curve.py b/pytorch_lightning/metrics/functional/precision_recall_curve.py index e497c5f7b37c7..0d562f8d6c3ae 100644 --- a/pytorch_lightning/metrics/functional/precision_recall_curve.py +++ b/pytorch_lightning/metrics/functional/precision_recall_curve.py @@ -173,7 +173,7 @@ def precision_recall_curve( which for binary problem is translate to 1. For multiclass problems this argument should not be set as we iteratively change it in the range [0,num_classes-1] - sample_weight: sample weights for each data point + sample_weights: sample weights for each data point Returns: 3-element tuple containing diff --git a/pytorch_lightning/metrics/functional/psnr.py b/pytorch_lightning/metrics/functional/psnr.py index 4aec3d902b418..40d3b16e538dc 100644 --- a/pytorch_lightning/metrics/functional/psnr.py +++ b/pytorch_lightning/metrics/functional/psnr.py @@ -46,8 +46,6 @@ def psnr( - ``'elementwise_mean'``: takes the mean (default) - ``'sum'``: takes the sum - ``'none'``: no reduction will be applied - return_state: returns a internal state that can be ddp reduced - before doing the final calculation Return: Tensor with PSNR score diff --git a/pytorch_lightning/metrics/functional/r2score.py b/pytorch_lightning/metrics/functional/r2score.py index f689e3ac9cac1..82117dd688064 100644 --- a/pytorch_lightning/metrics/functional/r2score.py +++ b/pytorch_lightning/metrics/functional/r2score.py @@ -98,7 +98,7 @@ def r2score( be provided as the ``adjusted`` argument. Args: - pred: estimated labels + preds: estimated labels target: ground truth labels adjusted: number of independent regressors for calculating adjusted r2 score. Default 0 (standard r2 score). diff --git a/pytorch_lightning/metrics/functional/roc.py b/pytorch_lightning/metrics/functional/roc.py index ffd5f9f0ac79c..26fa6d07d4f61 100644 --- a/pytorch_lightning/metrics/functional/roc.py +++ b/pytorch_lightning/metrics/functional/roc.py @@ -98,7 +98,7 @@ def roc( which for binary problem is translate to 1. For multiclass problems this argument should not be set as we iteratively change it in the range [0,num_classes-1] - sample_weight: sample weights for each data point + sample_weights: sample weights for each data point Returns: 3-element tuple containing diff --git a/pytorch_lightning/metrics/functional/ssim.py b/pytorch_lightning/metrics/functional/ssim.py index b52744421aef2..a978ce8268161 100644 --- a/pytorch_lightning/metrics/functional/ssim.py +++ b/pytorch_lightning/metrics/functional/ssim.py @@ -125,7 +125,7 @@ def ssim( Computes Structual Similarity Index Measure Args: - pred: estimated image + preds: estimated image target: ground truth image kernel_size: size of the gaussian kernel (default: (11, 11)) sigma: Standard deviation of the gaussian kernel (default: (1.5, 1.5)) diff --git a/pytorch_lightning/metrics/utils.py b/pytorch_lightning/metrics/utils.py index d79d1a355db1e..e324dad33a6f1 100644 --- a/pytorch_lightning/metrics/utils.py +++ b/pytorch_lightning/metrics/utils.py @@ -232,7 +232,7 @@ def class_reduce( Args: num: numerator tensor - decom: denominator tensor + denom: denominator tensor weights: weights for each class class_reduction: reduction method for multiclass problems diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py index b7249dfd99980..1943a83644e29 100644 --- a/pytorch_lightning/overrides/data_parallel.py +++ b/pytorch_lightning/overrides/data_parallel.py @@ -16,10 +16,12 @@ import threading from collections.abc import Iterable, Mapping from itertools import chain +from typing import Optional import torch +from torch import Tensor from torch.cuda._utils import _get_device_index -from torch.nn import DataParallel +from torch.nn import DataParallel, Module from torch.nn.parallel import DistributedDataParallel from torch.nn.parallel._functions import Gather @@ -222,15 +224,20 @@ def warn_missing_output(fx_called): warning_cache.warn("Your training_step returned None. Make sure that was your intention!") -def parallel_apply(modules, inputs, kwargs_tup=None, devices=None): # pragma: no-cover +def parallel_apply( + modules: Module, + inputs: Tensor, + kwargs_tup: Optional[tuple] = None, + devices: Optional[list] = None, +): # pragma: no-cover r"""Applies each `module` in :attr:`modules` in parallel on arguments contained in :attr:`inputs` (positional) and :attr:`kwargs_tup` (keyword) on each of :attr:`devices`. Args: - modules (Module): modules to be parallelized - inputs (tensor): inputs to the modules - devices (list of int or torch.device): CUDA devices + modules: modules to be parallelized + inputs: inputs to the modules + devices: CUDA devices :attr:`modules`, :attr:`inputs`, :attr:`kwargs_tup` (if given), and :attr:`devices` (if given) should all have same length. Moreover, each diff --git a/pytorch_lightning/trainer/connectors/checkpoint_connector.py b/pytorch_lightning/trainer/connectors/checkpoint_connector.py index f13765ac28ce4..d46e0e4cf3503 100644 --- a/pytorch_lightning/trainer/connectors/checkpoint_connector.py +++ b/pytorch_lightning/trainer/connectors/checkpoint_connector.py @@ -345,10 +345,11 @@ def hpc_load(self, checkpoint_path: str, on_gpu: bool): model.on_hpc_load(checkpoint) def max_ckpt_in_folder(self, dir_path: Union[str, Path], name_key: str = 'ckpt_') -> Optional[int]: - """List up files in `dir_path` with name_key, then yield maximum suffix number. + """List up files in `dir_path` with `name_key`, then yield maximum suffix number. Args: dir_path: path of directory which may contain files whose name include `name_key` + name_key: file name prefix Returns: None if no-corresponding-file else maximum suffix number diff --git a/pytorch_lightning/utilities/argparse.py b/pytorch_lightning/utilities/argparse.py index ff800802fef19..70d36e9dccccb 100644 --- a/pytorch_lightning/utilities/argparse.py +++ b/pytorch_lightning/utilities/argparse.py @@ -25,6 +25,7 @@ def from_argparse_args(cls, args: Union[Namespace, ArgumentParser], **kwargs): Eventually use varibles from OS environement which are defined as "PL__" Args: + cls: Lightning class args: The parser or namespace to take arguments from. Only known arguments will be parsed and passed to the :class:`Trainer`. **kwargs: Additional keyword arguments that may override ones in the parser or namespace. @@ -139,6 +140,7 @@ def add_argparse_args(cls, parent_parser: ArgumentParser) -> ArgumentParser: r"""Extends existing argparse by default `Trainer` attributes. Args: + cls: Lightning class parent_parser: The custom cli arguments parser, which will be extended by the Trainer default arguments. From 5f94900361ed53bf55caee66ad6d7eac5230b573 Mon Sep 17 00:00:00 2001 From: chaton Date: Thu, 7 Jan 2021 16:57:26 +0100 Subject: [PATCH 3/3] [Feat] Cleanup ModelCheckpoint / EarlyStopping by moving logic to LoggerConnector (#5218) * [bug-fix] Metric reduction with Logging (#5150) * add test * resolve bug * udpate test * wrongly copy / paste * update test * resolve a second bug Co-authored-by: Ubuntu * iupdate * resolve bugs * add test back * correct flake8 * resolve flake8 * update on comments * update tests * add a test * add test * update to Callable Co-authored-by: Ubuntu --- CHANGELOG.md | 3 + pytorch_lightning/callbacks/early_stopping.py | 13 +-- .../callbacks/model_checkpoint.py | 10 +-- .../logger_connector/epoch_result_store.py | 10 +-- .../logger_connector/logger_connector.py | 71 +++++++++++++--- .../logger_connector/metrics_holder.py | 80 +++++++++++++++++++ .../trainer/logging/test_logger_connector.py | 35 +++++++- 7 files changed, 180 insertions(+), 42 deletions(-) create mode 100644 pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 928144320394a..8efcd6f06c2d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed +- Changed `automatic casting` for LoggerConnector `metrics` ([#5218](https://github.com/PyTorchLightning/pytorch-lightning/pull/5218)) + + - `stat_scores` metric now calculates stat scores over all classes and gains new parameters, in line with the new `StatScores` metric ([#4839](https://github.com/PyTorchLightning/pytorch-lightning/pull/4839)) diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py index c8725f4cde6fd..fca39036c9404 100644 --- a/pytorch_lightning/callbacks/early_stopping.py +++ b/pytorch_lightning/callbacks/early_stopping.py @@ -19,14 +19,12 @@ Monitor a metric and stop training when it stops improving. """ -import numbers import numpy as np import torch from pytorch_lightning.callbacks.base import Callback -from pytorch_lightning.metrics.metric import Metric -from pytorch_lightning.utilities import _TPU_AVAILABLE, rank_zero_info, rank_zero_warn +from pytorch_lightning.utilities import rank_zero_info, rank_zero_warn class EarlyStopping(Callback): @@ -196,15 +194,6 @@ def _run_early_stopping_check(self, trainer, pl_module): # when in dev debugging trainer.dev_debugger.track_early_stopping_history(self, current) - if current is not None: - if isinstance(current, Metric): - current = current.compute() - elif isinstance(current, numbers.Number): - current = torch.tensor(current, device=pl_module.device, dtype=torch.float) - - if trainer.use_tpu and _TPU_AVAILABLE: - current = current.cpu() - if self.monitor_op(current - self.min_delta, self.best_score): self.best_score = current self.wait_count = 0 diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py index 7fd7a571a47ce..3fc2b54d98162 100644 --- a/pytorch_lightning/callbacks/model_checkpoint.py +++ b/pytorch_lightning/callbacks/model_checkpoint.py @@ -20,7 +20,6 @@ """ -import numbers import os import re from copy import deepcopy @@ -33,7 +32,6 @@ from pytorch_lightning import _logger as log from pytorch_lightning.callbacks.base import Callback -from pytorch_lightning.metrics.metric import Metric from pytorch_lightning.utilities import rank_zero_info, rank_zero_only, rank_zero_warn from pytorch_lightning.utilities.cloud_io import get_filesystem from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -554,12 +552,6 @@ def _save_top_k_checkpoints(self, trainer, pl_module, metrics): epoch = metrics.get("epoch") step = metrics.get("step") - if current is not None: - if isinstance(current, Metric): - current = current.compute() - elif isinstance(current, numbers.Number): - current = torch.tensor(current, device=pl_module.device, dtype=torch.float) - if self.check_monitor_top_k(current): self._update_best_and_save(current, epoch, step, trainer, pl_module, metrics) elif self.verbose: @@ -587,7 +579,7 @@ def _update_best_and_save( self.best_k_models.pop(del_filepath) # do not save nan, replace with +/- inf - if torch.isnan(current): + if isinstance(current, torch.Tensor) and torch.isnan(current): current = torch.tensor(float('inf' if self.mode == "min" else '-inf')) filepath = self._get_metric_interpolated_filepath_name(ckpt_name_metrics, epoch, step, del_filepath) diff --git a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py b/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py index dd12a2970727a..2796a61ee5c83 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py @@ -379,7 +379,7 @@ def update_logger_connector(self) -> None: if is_train: # Only log and add to callback epoch step during evaluation, test. - logger_connector.logged_metrics.update(batch_log_metrics) + logger_connector._logged_metrics.update(batch_log_metrics) callback_metrics.update(batch_pbar_metrics) callback_metrics.update(batch_log_metrics) else: @@ -389,8 +389,8 @@ def update_logger_connector(self) -> None: # get logged_metrics epoch_log_metrics = self.get_epoch_log_metrics() - logger_connector.logged_metrics.update(epoch_log_metrics) - logger_connector.logged_metrics.update(epoch=self.trainer.current_epoch) + logger_connector._logged_metrics.update(epoch_log_metrics) + logger_connector._logged_metrics.update({"epoch": self.trainer.current_epoch}) # get forked_metrics forked_metrics = self.get_forked_metrics() @@ -403,8 +403,8 @@ def update_logger_connector(self) -> None: logger_connector.evaluation_callback_metrics.update(callback_metrics) # update callback_metrics - logger_connector.callback_metrics.update(callback_metrics) - logger_connector.callback_metrics.pop("epoch", None) + logger_connector._callback_metrics.update(callback_metrics) + logger_connector._callback_metrics.pop("epoch", None) batch_pbar_metrics.pop("debug_epoch", None) return batch_pbar_metrics, batch_log_metrics diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 6b55b3bce1b9a..73e9223fb7d0f 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -14,7 +14,7 @@ from copy import deepcopy import os from pprint import pprint -from typing import Iterable, Union +from typing import Any, Iterable, Union, Dict import torch @@ -23,6 +23,7 @@ from pytorch_lightning.loggers import LoggerCollection, TensorBoardLogger from pytorch_lightning.trainer.connectors.logger_connector.callback_hook_validator import CallbackHookNameValidator from pytorch_lightning.trainer.connectors.logger_connector.epoch_result_store import EpochResultStore, LoggerStages +from pytorch_lightning.trainer.connectors.logger_connector.metrics_holder import MetricsHolder from pytorch_lightning.utilities import flatten_dict from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.model_helpers import is_overridden @@ -31,19 +32,64 @@ class LoggerConnector: def __init__(self, trainer): self.trainer = trainer - self.callback_metrics = {} - self.evaluation_callback_metrics = {} - self.logged_metrics = {} - self.progress_bar_metrics = {} + self._callback_metrics = MetricsHolder() + self._evaluation_callback_metrics = MetricsHolder(to_float=True) + self._logged_metrics = MetricsHolder() + self._progress_bar_metrics = MetricsHolder() self.eval_loop_results = [] self._cached_results = {stage: EpochResultStore(trainer, stage) for stage in LoggerStages} self._callback_hook_validator = CallbackHookNameValidator() self._current_stage = None + @property + def callback_metrics(self) -> Dict: + return self.get_metrics("callback_metrics") + + @callback_metrics.setter + def callback_metrics(self, callback_metrics: Dict) -> None: + self.set_metrics("callback_metrics", callback_metrics) + + @property + def evaluation_callback_metrics(self) -> Dict: + return self.get_metrics("evaluation_callback_metrics") + + @evaluation_callback_metrics.setter + def evaluation_callback_metrics(self, evaluation_callback_metrics: Dict) -> None: + self.set_metrics("evaluation_callback_metrics", evaluation_callback_metrics) + + @property + def logged_metrics(self) -> Dict: + return self.get_metrics("logged_metrics") + + @logged_metrics.setter + def logged_metrics(self, logged_metrics: Dict) -> None: + self.set_metrics("logged_metrics", logged_metrics) + + @property + def progress_bar_metrics(self) -> Dict: + return self.get_metrics("progress_bar_metrics") + + @progress_bar_metrics.setter + def progress_bar_metrics(self, progress_bar_metrics: Dict) -> None: + self.set_metrics("progress_bar_metrics", progress_bar_metrics) + @property def cached_results(self) -> Union[EpochResultStore, None]: return self._cached_results.get(self._current_stage) # type: ignore + def get_metrics(self, key: str) -> Dict: + metrics_holder = getattr(self, f"_{key}", None) + model_ref = self.trainer.get_model() + metrics_holder.convert( + self.trainer.use_tpu, + model_ref.device if model_ref is not None else model_ref + ) + return metrics_holder.metrics + + def set_metrics(self, key: str, val: Any) -> None: + metrics_holder = getattr(self, f"_{key}", None) + metrics_holder.reset(val) + def set_stage(self, stage_or_testing: Union[str, bool], reset: bool = False) -> None: self._current_stage = LoggerStages.determine_stage(stage_or_testing) if reset: @@ -153,10 +199,10 @@ def cache_training_step_metrics(self, opt_closure_result): if len(pbar_metrics_tmp) > 0: self.add_progress_bar_metrics(pbar_metrics_tmp) - self.callback_metrics.update(callback_metrics_tmp) + self._callback_metrics.update(callback_metrics_tmp) # save legacy log metrics - self.logged_metrics.update(logged_metrics_tmp) + self._logged_metrics.update(logged_metrics_tmp) self.cached_results.legacy_batch_log_metrics.update(logged_metrics_tmp) def log_metrics(self, metrics, grad_norm_dic, step=None, log_train_step_metrics=False): @@ -209,7 +255,7 @@ def add_progress_bar_metrics(self, metrics): if isinstance(v, torch.Tensor): v = v.item() - self.progress_bar_metrics[k] = v + self._progress_bar_metrics.metrics[k] = v self.trainer.dev_debugger.track_pbar_metrics_history(metrics) @@ -311,6 +357,7 @@ def _track_callback_metrics(self, eval_results, using_eval_result): if 'val_loss' in flat: flat['checkpoint_on'] = flat['val_loss'] flat['early_stop_on'] = flat['val_loss'] + self.trainer.logger_connector.callback_metrics.update(flat) if self.trainer.testing: self.trainer.logger_connector.evaluation_callback_metrics.update(flat) @@ -441,15 +488,15 @@ def log_train_epoch_end_metrics( # add the metrics to the loggers and callbacks if epoch_log_metrics and len(epoch_log_metrics) > 0: self.log_metrics(epoch_log_metrics, {}) - self.callback_metrics.update(epoch_log_metrics) + self._callback_metrics.update(epoch_log_metrics) # add metrics to callbacks - self.callback_metrics.update(epoch_callback_metrics) + self._callback_metrics.update(epoch_callback_metrics) # add metrics to progress_bar and callbacks if len(epoch_progress_bar_metrics) > 0: self.add_progress_bar_metrics(epoch_progress_bar_metrics) - self.callback_metrics.update(epoch_progress_bar_metrics) + self._callback_metrics.update(epoch_progress_bar_metrics) # reset epoch loop result for next epoch self.cached_results.reset() @@ -605,4 +652,4 @@ def log_train_step_metrics(self, batch_output): grad_norm_dic = {} if len(batch_log_metrics) > 0 or len(grad_norm_dic) > 0: self.log_metrics(batch_log_metrics, grad_norm_dic, log_train_step_metrics=True) - self.callback_metrics.update(batch_log_metrics) + self._callback_metrics.update(batch_log_metrics) diff --git a/pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py b/pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py new file mode 100644 index 0000000000000..d2e2c9b7870cf --- /dev/null +++ b/pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py @@ -0,0 +1,80 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numbers +from typing import Any + +import torch + +from pytorch_lightning.metrics.metric import Metric +from pytorch_lightning.utilities import _TPU_AVAILABLE + + +class MetricsHolder: + + """ + This class acts as a dictonary holder. + It holds metrics and implements conversion functions. + Those functions will be triggered within LoggerConnector + when the property is being requested from the user. + """ + + def __init__(self, to_float: bool = False): + self.metrics = {} + self._to_float = to_float + + def update(self, metrics): + self.metrics.update(metrics) + + def pop(self, key, default): + return self.metrics.pop(key, default) + + def reset(self, metrics): + self.metrics = metrics + + def convert(self, use_tpu: bool, device: torch.device): + for key, value in self.metrics.items(): + self.metrics[key] = self._convert(value, use_tpu, device) + + def _convert(self, current: Any, use_tpu: bool, device: torch.device): + if self._to_float: + return self._convert_to_float(current, use_tpu, device) + return self._convert_to_tensor(current, use_tpu, device) + + def _convert_to_float(self, current, use_tpu: bool, device: torch.device): + if isinstance(current, Metric): + current = current.compute().detach() + + if isinstance(current, torch.Tensor): + current = float(current.item()) + + elif isinstance(current, int): + current = float(current) + + return current + + def _convert_to_tensor(self, current: Any, use_tpu: bool, device: torch.device): + if current is not None: + if isinstance(current, Metric): + current = current.compute().detach() + + elif isinstance(current, numbers.Number): + if device is None: + current = torch.tensor(current, dtype=torch.float) + else: + current = torch.tensor(current, device=device, dtype=torch.float) + + if use_tpu and _TPU_AVAILABLE: + current = current.cpu() + + return current diff --git a/tests/trainer/logging/test_logger_connector.py b/tests/trainer/logging/test_logger_connector.py index 56e5765c7f4b8..f911c793b0707 100644 --- a/tests/trainer/logging/test_logger_connector.py +++ b/tests/trainer/logging/test_logger_connector.py @@ -15,6 +15,7 @@ Tests to ensure that the training loop works with a dict (1.0) """ from copy import deepcopy +from typing import Any, Callable import pytest import torch @@ -22,15 +23,17 @@ from pytorch_lightning.callbacks.base import Callback from pytorch_lightning.core.step_result import Result +from pytorch_lightning.metrics import Accuracy from pytorch_lightning.trainer import Trainer from pytorch_lightning.trainer.connectors.logger_connector.callback_hook_validator import CallbackHookNameValidator +from pytorch_lightning.trainer.connectors.logger_connector.metrics_holder import MetricsHolder from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.base.boring_model import BoringModel, RandomDataset -def decorator_with_arguments(fx_name='', hook_fx_name=None): - def decorator(func): - def wrapper(self, *args, **kwargs): +def decorator_with_arguments(fx_name: str = '', hook_fx_name: str = None) -> Callable: + def decorator(func: Callable) -> Callable: + def wrapper(self, *args, **kwargs) -> Any: # Set information self._current_fx_name = fx_name self._current_hook_fx_name = hook_fx_name @@ -43,7 +46,6 @@ def wrapper(self, *args, **kwargs): return result return wrapper - return decorator @@ -425,3 +427,28 @@ def test_dataloader(self): ) trainer.fit(model) trainer.test(model, ckpt_path=None) + + +@pytest.mark.parametrize('to_float', [False, True]) +def test_metrics_holder(to_float, tmpdir): + + device = "cuda" if torch.cuda.is_available() else "cpu" + preds = torch.tensor([[0.9, 0.1]], device=device) + + def is_float(value: Any) -> bool: + return isinstance(value, float) + + excepted_function = is_float if to_float else torch.is_tensor + targets = torch.tensor([1], device=device) + acc = Accuracy().to(device) + metric_holder = MetricsHolder(to_float=to_float) + metric_holder.update({ + "x": 1, + "y": torch.tensor(2), + "z": acc(preds, targets), + }) + metric_holder.convert(False, device) + metrics = metric_holder.metrics + assert excepted_function(metrics["x"]) + assert excepted_function(metrics["y"]) + assert excepted_function(metrics["z"])