From 7464aca44e73f72998c411e999e085e4fdfd7568 Mon Sep 17 00:00:00 2001
From: Gianluca Scarpellini <gianluca@scarpellini.dev>
Date: Thu, 7 Jan 2021 11:50:08 +0100
Subject: [PATCH 1/3] test_cpu and test_gpu EvalModelTemplate deprecation
 (#4820)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* test_cpu refactoring - BoringModel and checkpoints; test_gpu refactoring - BoringModelboring_model refactoring - validation, testing; Fix - run_prediction as dispatcher for testing BoringModel

* Removed EvalModelTemplate import from test_cpu and test_gpu

* Reverting unintended changes

* Issues with checkpointing

* Fixed tests for logging and checkpointing

* Fix for dispatcher

* test_cpu refactoring - BoringModel and checkpoints; test_gpu refactoring - BoringModelboring_model refactoring - validation, testing; Fix - run_prediction as dispatcher for testing BoringModel

* Removed EvalModelTemplate import from test_cpu and test_gpu

* Reverting unintended changes

* Issues with checkpointing

* Fixed tests for logging and checkpointing

* Fix for dispatcher

* Fixed acc check for stocasticity of seeds

* Fixed according to @borda suggestions

* Hparams for boring_model

* Deprecated RuntimeParamChagneModelAssing (functionality is tested in RuntimeParamChangeModelSaving)

* Reduced boring_model parameters to just in and out features, test_cpu modelsinherit BoringModel to specify additional parameters (e.g., optimizer)

* Fix PEP8

* Update tests/base/develop_pipelines.py

Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com>

* Update tests/base/boring_model.py

Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com>

* Update tests/base/develop_pipelines.py

Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com>

* Update tests/models/test_cpu.py

Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com>

* Update tests/models/test_cpu.py

Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com>

* Merged test_early_stopping with all_features; added TODO for self.log

* Fixed test_all_features trainer options

* Ready for review!

* Update tests/models/test_cpu.py

Thank you! :)

Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com>

* Update tests/models/test_cpu.py

Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com>

* Update tests/models/test_cpu.py

Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com>

* Update tests/models/test_cpu.py

Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com>

* Update tests/models/test_cpu.py

Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com>

* added optimizer_name, lr, and batch_size as hparams for save_hparameters()

* Fixes for reducing PR size

* Reverse test_hparams (removed DEPRECATED test for hparams direct assignment)

* Changes for in_features

* Fixed hparams

* Fixed parameters for boring_model

* Update tests/models/test_cpu.py

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>

* Update tests/models/test_cpu.py

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>

* Update tests/models/test_cpu.py

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>

* fix for pep8

* Fixed run_predction and TODO

* fix min acc for darwin/windows without pl_opt

* eval as DEFAULT run_prediction strategy

* Updated val_dataloader for running_test_no_val

Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com>
Co-authored-by: chaton <thomas@grid.ai>
Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
---
 tests/base/develop_pipelines.py               |  46 +++--
 .../data/horovod/train_default_model.py       |   2 +-
 tests/models/test_cpu.py                      | 168 +++++++++---------
 tests/models/test_gpu.py                      |  11 +-
 tests/models/test_hparams.py                  |  16 +-
 tests/models/test_restore.py                  |   7 +-
 tests/trainer/test_dataloaders.py             |   4 +-
 7 files changed, 132 insertions(+), 122 deletions(-)

diff --git a/tests/base/develop_pipelines.py b/tests/base/develop_pipelines.py
index 24535dc67da8e..b6289079a35ab 100644
--- a/tests/base/develop_pipelines.py
+++ b/tests/base/develop_pipelines.py
@@ -14,8 +14,8 @@
 import torch
 
 from pytorch_lightning import Trainer
-from tests.base.develop_utils import load_model_from_checkpoint, get_default_logger, \
-    reset_seed
+from tests.base import BoringModel
+from tests.base.develop_utils import get_default_logger, load_model_from_checkpoint, reset_seed
 
 
 def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50):
@@ -31,6 +31,7 @@ def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50
     pretrained_model = load_model_from_checkpoint(
         trainer.logger,
         trainer.checkpoint_callback.best_model_path,
+        type(model)
     )
 
     # test new model accuracy
@@ -39,7 +40,7 @@ def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50
         test_loaders = [test_loaders]
 
     for dataloader in test_loaders:
-        run_prediction(dataloader, pretrained_model, min_acc=min_acc)
+        run_prediction(pretrained_model, dataloader, min_acc=min_acc)
 
     if trainer.use_ddp:
         # on hpc this would work fine... but need to hack it for the purpose of the test
@@ -47,7 +48,8 @@ def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50
         trainer.optimizers, trainer.lr_schedulers = pretrained_model.configure_optimizers()
 
 
-def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, with_hpc: bool = True):
+def run_model_test(trainer_options, model, on_gpu: bool = True, version=None,
+                   with_hpc: bool = True, min_acc: float = 0.25):
 
     reset_seed()
     save_dir = trainer_options['default_root_dir']
@@ -56,9 +58,6 @@ def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, wi
     logger = get_default_logger(save_dir, version=version)
     trainer_options.update(logger=logger)
 
-    if 'checkpoint_callback' not in trainer_options:
-        trainer_options.update(checkpoint_callback=True)
-
     trainer = Trainer(**trainer_options)
     initial_values = torch.tensor([torch.sum(torch.abs(x)) for x in model.parameters()])
     result = trainer.fit(model)
@@ -66,10 +65,11 @@ def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, wi
 
     assert result == 1, 'trainer failed'
     # Check that the model is actually changed post-training
-    assert torch.norm(initial_values - post_train_values) > 0.1
+    change_ratio = torch.norm(initial_values - post_train_values)
+    assert change_ratio > 0.1, f"the model is changed of {change_ratio}"
 
     # test model loading
-    pretrained_model = load_model_from_checkpoint(logger, trainer.checkpoint_callback.best_model_path)
+    pretrained_model = load_model_from_checkpoint(logger, trainer.checkpoint_callback.best_model_path, type(model))
 
     # test new model accuracy
     test_loaders = model.test_dataloader()
@@ -77,14 +77,15 @@ def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, wi
         test_loaders = [test_loaders]
 
     for dataloader in test_loaders:
-        run_prediction(dataloader, pretrained_model)
+        run_prediction(pretrained_model, dataloader, min_acc=min_acc)
 
     if with_hpc:
         if trainer.use_ddp or trainer.use_ddp2:
             # on hpc this would work fine... but need to hack it for the purpose of the test
             trainer.model = pretrained_model
-            trainer.optimizers, trainer.lr_schedulers, trainer.optimizer_frequencies = \
-                trainer.init_optimizers(pretrained_model)
+            trainer.optimizers, trainer.lr_schedulers, trainer.optimizer_frequencies = trainer.init_optimizers(
+                pretrained_model
+            )
 
         # test HPC saving
         trainer.checkpoint_connector.hpc_save(save_dir, logger)
@@ -93,7 +94,14 @@ def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, wi
         trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=on_gpu)
 
 
-def run_prediction(dataloader, trained_model, dp=False, min_acc=0.50):
+def run_prediction(trained_model, dataloader, dp=False, min_acc=0.25):
+    if isinstance(trained_model, BoringModel):
+        return _boring_model_run_prediction(trained_model, dataloader, dp, min_acc)
+    else:
+        return _eval_model_template_run_prediction(trained_model, dataloader, dp, min_acc)
+
+
+def _eval_model_template_run_prediction(trained_model, dataloader, dp=False, min_acc=0.50):
     # run prediction on 1 batch
     batch = next(iter(dataloader))
     x, y = batch
@@ -102,7 +110,7 @@ def run_prediction(dataloader, trained_model, dp=False, min_acc=0.50):
     if dp:
         with torch.no_grad():
             output = trained_model(batch, 0)
-        acc = output['val_acc']
+            acc = output['val_acc']
         acc = torch.mean(acc).item()
 
     else:
@@ -119,3 +127,13 @@ def run_prediction(dataloader, trained_model, dp=False, min_acc=0.50):
         acc = acc.item()
 
     assert acc >= min_acc, f"This model is expected to get > {min_acc} in test set (it got {acc})"
+
+
+def _boring_model_run_prediction(trained_model, dataloader, dp=False, min_acc=0.25):
+    # run prediction on 1 batch
+    batch = next(iter(dataloader))
+    with torch.no_grad():
+        output = trained_model(batch)
+    acc = trained_model.loss(batch, output)
+
+    assert acc >= min_acc, f"This model is expected to get, {min_acc} in test set but got {acc}"
diff --git a/tests/models/data/horovod/train_default_model.py b/tests/models/data/horovod/train_default_model.py
index 62f874902b094..c38b5b4efafe8 100644
--- a/tests/models/data/horovod/train_default_model.py
+++ b/tests/models/data/horovod/train_default_model.py
@@ -72,7 +72,7 @@ def run_test_from_config(trainer_options):
         test_loaders = [test_loaders]
 
     for dataloader in test_loaders:
-        run_prediction(dataloader, pretrained_model)
+        run_prediction(pretrained_model, dataloader)
 
     # test HPC saving
     trainer.checkpoint_connector.hpc_save(ckpt_path, trainer.logger)
diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py
index 892077ccdb1be..cc24f6f187502 100644
--- a/tests/models/test_cpu.py
+++ b/tests/models/test_cpu.py
@@ -21,15 +21,14 @@
 import tests.base.develop_pipelines as tpipes
 import tests.base.develop_utils as tutils
 from pytorch_lightning import Trainer
-from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
-from tests.base import EvalModelTemplate
+from pytorch_lightning.callbacks import Callback, EarlyStopping, ModelCheckpoint
+from tests.base import BoringModel
 
 
 @pytest.mark.parametrize("enable_pl_optimizer", [False, True])
 def test_cpu_slurm_save_load(enable_pl_optimizer, tmpdir):
     """Verify model save/load/checkpoint on CPU."""
-    hparams = EvalModelTemplate.get_default_hparams()
-    model = EvalModelTemplate(**hparams)
+    model = BoringModel()
 
     # logger file to get meta
     logger = tutils.get_default_logger(tmpdir)
@@ -61,11 +60,8 @@ def test_cpu_slurm_save_load(enable_pl_optimizer, tmpdir):
         for batch in dataloader:
             break
 
-    x, y = batch
-    x = x.view(x.size(0), -1)
-
     model.eval()
-    pred_before_saving = model(x)
+    pred_before_saving = model(batch)
 
     # test HPC saving
     # simulate snapshot on slurm
@@ -75,26 +71,26 @@ def test_cpu_slurm_save_load(enable_pl_optimizer, tmpdir):
     # new logger file to get meta
     logger = tutils.get_default_logger(tmpdir, version=version)
 
+    model = BoringModel()
+
+    class _StartCallback(Callback):
+        # set the epoch start hook so we can predict before the model does the full training
+        def on_train_epoch_start(self, trainer, model):
+            assert trainer.global_step == real_global_step and trainer.global_step > 0
+            # predict with loaded model to make sure answers are the same
+            mode = model.training
+            model.eval()
+            new_pred = model(batch)
+            assert torch.eq(pred_before_saving, new_pred).all()
+            model.train(mode)
+
     trainer = Trainer(
         default_root_dir=tmpdir,
         max_epochs=1,
         logger=logger,
-        callbacks=[ModelCheckpoint(dirpath=tmpdir)],
         enable_pl_optimizer=enable_pl_optimizer,
+        callbacks=[_StartCallback(), ModelCheckpoint(dirpath=tmpdir)],
     )
-    model = EvalModelTemplate(**hparams)
-
-    # set the epoch start hook so we can predict before the model does the full training
-    def assert_pred_same():
-        assert trainer.global_step == real_global_step and trainer.global_step > 0
-
-        # predict with loaded model to make sure answers are the same
-        trainer.model.eval()
-        new_pred = trainer.model(x)
-        assert torch.all(torch.eq(pred_before_saving, new_pred)).item() == 1
-
-    model.on_epoch_start = assert_pred_same
-
     # by calling fit again, we trigger training, loading weights from the cluster
     # and our hook to predict using current model before any more weight updates
     trainer.fit(model)
@@ -102,21 +98,26 @@ def assert_pred_same():
 
 @pytest.mark.parametrize("enable_pl_optimizer", [False, True])
 def test_early_stopping_cpu_model(enable_pl_optimizer, tmpdir):
-    """Test each of the trainer options."""
-    stopping = EarlyStopping(monitor='early_stop_on', min_delta=0.1)
+    class ModelTrainVal(BoringModel):
+        def validation_epoch_end(self, outputs) -> None:
+            val_loss = torch.stack([x["x"] for x in outputs]).mean()
+            self.log('val_loss', val_loss)
+
+    stopping = EarlyStopping(monitor="val_loss", min_delta=0.1)
     trainer_options = dict(
-        default_root_dir=tmpdir,
         callbacks=[stopping],
-        max_epochs=2,
+        default_root_dir=tmpdir,
         gradient_clip_val=1.0,
         overfit_batches=0.20,
         track_grad_norm=2,
+        enable_pl_optimizer=enable_pl_optimizer,
+        progress_bar_refresh_rate=0,
+        accumulate_grad_batches=2,
         limit_train_batches=0.1,
         limit_val_batches=0.1,
-        enable_pl_optimizer=enable_pl_optimizer,
     )
 
-    model = EvalModelTemplate()
+    model = ModelTrainVal()
     tpipes.run_model_test(trainer_options, model, on_gpu=False)
 
     # test freeze on cpu
@@ -146,26 +147,29 @@ def test_multi_cpu_model_ddp(enable_pl_optimizer, tmpdir):
         enable_pl_optimizer=enable_pl_optimizer,
     )
 
-    model = EvalModelTemplate()
-    tpipes.run_model_test(trainer_options, model, on_gpu=False)
+    model = BoringModel()
+    tpipes.run_model_test(trainer_options, model, on_gpu=False, min_acc=0.05)
 
 
 def test_lbfgs_cpu_model(tmpdir):
-    """Test each of the trainer options."""
+    """Test each of the trainer options. Testing LBFGS optimizer"""
+    class ModelSpecifiedOptimizer(BoringModel):
+        def __init__(self, optimizer_name, learning_rate):
+            super().__init__()
+            self.optimizer_name = optimizer_name
+            self.learning_rate = learning_rate
+            self.save_hyperparameters()
+
     trainer_options = dict(
         default_root_dir=tmpdir,
         max_epochs=1,
         progress_bar_refresh_rate=0,
-        weights_summary='top',
+        weights_summary="top",
         limit_train_batches=0.2,
         limit_val_batches=0.2,
     )
 
-    hparams = EvalModelTemplate.get_default_hparams()
-    hparams.update(optimizer_name='lbfgs',
-                   learning_rate=0.004)
-    model = EvalModelTemplate(**hparams)
-    model.configure_optimizers = model.configure_optimizers__lbfgs
+    model = ModelSpecifiedOptimizer(optimizer_name="LBFGS", learning_rate=0.004)
     tpipes.run_model_test_without_loggers(trainer_options, model, min_acc=0.25)
 
 
@@ -181,8 +185,8 @@ def test_default_logger_callbacks_cpu_model(tmpdir):
         limit_val_batches=0.01,
     )
 
-    model = EvalModelTemplate()
-    tpipes.run_model_test_without_loggers(trainer_options, model)
+    model = BoringModel()
+    tpipes.run_model_test_without_loggers(trainer_options, model, min_acc=0.01)
 
     # test freeze on cpu
     model.freeze()
@@ -191,7 +195,17 @@ def test_default_logger_callbacks_cpu_model(tmpdir):
 
 def test_running_test_after_fitting(tmpdir):
     """Verify test() on fitted model."""
-    model = EvalModelTemplate()
+    class ModelTrainValTest(BoringModel):
+
+        def validation_epoch_end(self, outputs) -> None:
+            val_loss = torch.stack([x["x"] for x in outputs]).mean()
+            self.log('val_loss', val_loss)
+
+        def test_epoch_end(self, outputs) -> None:
+            test_loss = torch.stack([x["y"] for x in outputs]).mean()
+            self.log('test_loss', test_loss)
+
+    model = ModelTrainValTest()
 
     # logger file to get meta
     logger = tutils.get_default_logger(tmpdir)
@@ -217,12 +231,22 @@ def test_running_test_after_fitting(tmpdir):
     trainer.test()
 
     # test we have good test accuracy
-    tutils.assert_ok_model_acc(trainer, thr=0.5)
+    tutils.assert_ok_model_acc(trainer, key='test_loss', thr=0.5)
 
 
 def test_running_test_no_val(tmpdir):
-    """Verify `test()` works on a model with no `val_loader`."""
-    model = EvalModelTemplate()
+    """Verify `test()` works on a model with no `val_dataloader`. It performs
+    train and test only"""
+    class ModelTrainTest(BoringModel):
+
+        def val_dataloader(self):
+            pass
+
+        def test_epoch_end(self, outputs) -> None:
+            test_loss = torch.stack([x["y"] for x in outputs]).mean()
+            self.log('test_loss', test_loss)
+
+    model = ModelTrainTest()
 
     # logger file to get meta
     logger = tutils.get_default_logger(tmpdir)
@@ -248,12 +272,12 @@ def test_running_test_no_val(tmpdir):
     trainer.test()
 
     # test we have good test accuracy
-    tutils.assert_ok_model_acc(trainer)
+    tutils.assert_ok_model_acc(trainer, key='test_loss')
 
 
 def test_simple_cpu(tmpdir):
     """Verify continue training session on CPU."""
-    model = EvalModelTemplate()
+    model = BoringModel()
 
     # fit model
     trainer = Trainer(
@@ -275,32 +299,12 @@ def test_cpu_model(tmpdir):
         progress_bar_refresh_rate=0,
         max_epochs=1,
         limit_train_batches=0.4,
-        limit_val_batches=0.4
-    )
-
-    model = EvalModelTemplate()
-
-    tpipes.run_model_test(trainer_options, model, on_gpu=False)
-
-
-@pytest.mark.parametrize("enable_pl_optimizer", [False, True])
-def test_all_features_cpu_model(enable_pl_optimizer, tmpdir):
-    """Test each of the trainer options."""
-    trainer_options = dict(
-        default_root_dir=tmpdir,
-        gradient_clip_val=1.0,
-        overfit_batches=0.20,
-        track_grad_norm=2,
-        progress_bar_refresh_rate=0,
-        accumulate_grad_batches=2,
-        max_epochs=1,
-        limit_train_batches=0.4,
         limit_val_batches=0.4,
-        enable_pl_optimizer=enable_pl_optimizer,
     )
 
-    model = EvalModelTemplate()
-    tpipes.run_model_test(trainer_options, model, on_gpu=False)
+    model = BoringModel()
+
+    tpipes.run_model_test(trainer_options, model, on_gpu=False, min_acc=0.01)
 
 
 def test_tbptt_cpu_model(tmpdir):
@@ -319,10 +323,12 @@ def __getitem__(self, i):
         def __len__(self):
             return 1
 
-    class BpttTestModel(EvalModelTemplate):
-        def __init__(self, *args, **kwargs):
+    class BpttTestModel(BoringModel):
+        def __init__(self, batch_size, in_features, out_features, *args, **kwargs):
             super().__init__(*args, **kwargs)
             self.test_hidden = None
+            self.batch_size = batch_size
+            self.layer = torch.nn.Linear(in_features, out_features)
 
         def training_step(self, batch, batch_idx, hiddens):
             assert hiddens == self.test_hidden, "Hidden state not persistent between tbptt steps"
@@ -335,18 +341,17 @@ def training_step(self, batch, batch_idx, hiddens):
             assert y_tensor.shape[1] == truncated_bptt_steps, "tbptt split list failed"
 
             pred = self(x_tensor.view(batch_size, truncated_bptt_steps))
-            loss_val = torch.nn.functional.mse_loss(
-                pred, y_tensor.view(batch_size, truncated_bptt_steps))
+            loss_val = torch.nn.functional.mse_loss(pred, y_tensor.view(batch_size, truncated_bptt_steps))
             return {
-                'loss': loss_val,
-                'hiddens': self.test_hidden,
+                "loss": loss_val,
+                "hiddens": self.test_hidden,
             }
 
         def training_epoch_end(self, training_step_outputs):
             training_step_outputs = training_step_outputs[0]
             assert len(training_step_outputs) == (sequence_size / truncated_bptt_steps)
-            loss = torch.stack([x['loss'] for x in training_step_outputs]).mean()
-            self.log('train_loss', loss)
+            loss = torch.stack([x["loss"] for x in training_step_outputs]).mean()
+            self.log("train_loss", loss)
 
         def train_dataloader(self):
             return torch.utils.data.DataLoader(
@@ -356,15 +361,8 @@ def train_dataloader(self):
                 sampler=None,
             )
 
-    hparams = EvalModelTemplate.get_default_hparams()
-    hparams.update(
-        batch_size=batch_size,
-        in_features=truncated_bptt_steps,
-        hidden_dim=truncated_bptt_steps,
-        out_features=truncated_bptt_steps
-    )
-
-    model = BpttTestModel(**hparams)
+    model = BpttTestModel(batch_size=batch_size,
+                          in_features=truncated_bptt_steps, out_features=truncated_bptt_steps)
     model.example_input_array = torch.randn(5, truncated_bptt_steps)
 
     # fit model
diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index 169552ce1bd75..7cfeb8f0ae53e 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -21,11 +21,10 @@
 import tests.base.develop_pipelines as tpipes
 import tests.base.develop_utils as tutils
 from pytorch_lightning import Trainer
+from pytorch_lightning.accelerators.gpu_accelerator import GPUAccelerator
 from pytorch_lightning.utilities import device_parser
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from tests.base import EvalModelTemplate
-from pytorch_lightning.accelerators.gpu_accelerator import GPUAccelerator
-
+from tests.base import BoringModel
 
 PRETEND_N_OF_GPUS = 16
 
@@ -43,8 +42,8 @@ def test_multi_gpu_none_backend(tmpdir):
         gpus=2,
     )
 
-    model = EvalModelTemplate()
-    tpipes.run_model_test(trainer_options, model)
+    model = BoringModel()
+    tpipes.run_model_test(trainer_options, model, min_acc=0.20)
 
 
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@@ -60,7 +59,7 @@ def test_single_gpu_model(tmpdir, gpus):
         gpus=gpus
     )
 
-    model = EvalModelTemplate()
+    model = BoringModel()
     tpipes.run_model_test(trainer_options, model)
 
 
diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py
index 5e5fab7d0a0b4..7081d450ee256 100644
--- a/tests/models/test_hparams.py
+++ b/tests/models/test_hparams.py
@@ -20,14 +20,14 @@
 import pytest
 import torch
 from fsspec.implementations.local import LocalFileSystem
-from omegaconf import OmegaConf, Container
+from omegaconf import Container, OmegaConf
 from torch.nn import functional as F
 from torch.utils.data import DataLoader
 
-from pytorch_lightning import Trainer, LightningModule
-from pytorch_lightning.core.saving import save_hparams_to_yaml, load_hparams_from_yaml
+from pytorch_lightning import LightningModule, Trainer
+from pytorch_lightning.core.saving import load_hparams_from_yaml, save_hparams_to_yaml
 from pytorch_lightning.utilities import AttributeDict, is_picklable
-from tests.base import EvalModelTemplate, TrialMNIST, BoringModel
+from tests.base import BoringModel, EvalModelTemplate, TrialMNIST
 
 
 class SaveHparamsModel(BoringModel):
@@ -595,13 +595,7 @@ def __init__(self, **kwargs):
         self.save_hyperparameters()
 
 
-class RuntimeParamChangeModelAssign(BoringModel):
-    def __init__(self, **kwargs):
-        super().__init__()
-        self.hparams = kwargs
-
-
-@pytest.mark.parametrize("cls", [RuntimeParamChangeModelSaving, RuntimeParamChangeModelAssign])
+@pytest.mark.parametrize("cls", [RuntimeParamChangeModelSaving])
 def test_init_arg_with_runtime_change(tmpdir, cls):
     """Test that we save/export only the initial hparams, no other runtime change allowed"""
     model = cls(running_arg=123)
diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py
index a2a9aa6b9042c..6ee5d362ffcaa 100644
--- a/tests/models/test_restore.py
+++ b/tests/models/test_restore.py
@@ -161,6 +161,7 @@ def test_callbacks_references_resume_from_checkpoint(enable_pl_optimizer, tmpdir
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_running_test_pretrained_model_distrib_dp(tmpdir):
     """Verify `test()` on pretrained model."""
+
     tutils.set_random_master_port()
 
     model = EvalModelTemplate()
@@ -205,7 +206,7 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir):
         dataloaders = [dataloaders]
 
     for dataloader in dataloaders:
-        tpipes.run_prediction(dataloader, pretrained_model)
+        tpipes.run_prediction(pretrained_model, dataloader)
 
 
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@@ -256,7 +257,7 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir):
         dataloaders = [dataloaders]
 
     for dataloader in dataloaders:
-        tpipes.run_prediction(dataloader, pretrained_model)
+        tpipes.run_prediction(pretrained_model, dataloader)
 
 
 def test_running_test_pretrained_model_cpu(tmpdir):
@@ -398,7 +399,7 @@ def assert_good_acc():
         dp_model.eval()
 
         dataloader = trainer.train_dataloader
-        tpipes.run_prediction(dataloader, dp_model, dp=True)
+        tpipes.run_prediction(dp_model, dataloader, dp=True)
 
     # new model
     model = EvalModelTemplate(**hparams)
diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py
index 9b42aa98c9dd0..614b2a8e66ab8 100644
--- a/tests/trainer/test_dataloaders.py
+++ b/tests/trainer/test_dataloaders.py
@@ -128,7 +128,7 @@ def test_multiple_val_dataloader(tmpdir):
 
     # make sure predictions are good for each val set
     for dataloader in trainer.val_dataloaders:
-        tpipes.run_prediction(dataloader, trainer.model)
+        tpipes.run_prediction(trainer.model, dataloader)
 
 
 @pytest.mark.parametrize('ckpt_path', [None, 'best', 'specific'])
@@ -164,7 +164,7 @@ def test_step(self, batch, batch_idx, *args, **kwargs):
 
     # make sure predictions are good for each test set
     for dataloader in trainer.test_dataloaders:
-        tpipes.run_prediction(dataloader, trainer.model)
+        tpipes.run_prediction(trainer.model, dataloader)
 
     # run the test method
     trainer.test(ckpt_path=ckpt_path)

From 5ae6926a520ecaa21fd96f3ebd15b9069dbd880a Mon Sep 17 00:00:00 2001
From: Jirka Borovec <Borda@users.noreply.github.com>
Date: Thu, 7 Jan 2021 14:01:52 +0100
Subject: [PATCH 2/3] fix some minor typos in docs (#5369)

* fix docs typos

* Apply suggestions from code review

Co-authored-by: Wansoo Kim <rladhkstn8@gmail.com>

* flake8

Co-authored-by: Wansoo Kim <rladhkstn8@gmail.com>
---
 .../accelerators/ddp_accelerator.py             |  1 -
 .../accelerators/ddp_hpc_accelerator.py         |  1 -
 .../accelerators/ddp_spawn_accelerator.py       |  2 +-
 .../accelerators/tpu_accelerator.py             |  1 -
 .../metrics/functional/average_precision.py     |  2 +-
 .../metrics/functional/explained_variance.py    |  2 +-
 pytorch_lightning/metrics/functional/f_beta.py  |  4 ++--
 .../metrics/functional/mean_squared_error.py    |  2 +-
 .../functional/mean_squared_log_error.py        |  2 +-
 .../functional/precision_recall_curve.py        |  2 +-
 pytorch_lightning/metrics/functional/psnr.py    |  2 --
 pytorch_lightning/metrics/functional/r2score.py |  2 +-
 pytorch_lightning/metrics/functional/roc.py     |  2 +-
 pytorch_lightning/metrics/functional/ssim.py    |  2 +-
 pytorch_lightning/metrics/utils.py              |  2 +-
 pytorch_lightning/overrides/data_parallel.py    | 17 ++++++++++++-----
 .../trainer/connectors/checkpoint_connector.py  |  3 ++-
 pytorch_lightning/utilities/argparse.py         |  2 ++
 18 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/pytorch_lightning/accelerators/ddp_accelerator.py b/pytorch_lightning/accelerators/ddp_accelerator.py
index e34a7183a5334..56f6eaa2223a3 100644
--- a/pytorch_lightning/accelerators/ddp_accelerator.py
+++ b/pytorch_lightning/accelerators/ddp_accelerator.py
@@ -223,7 +223,6 @@ def ddp_train(self, process_idx, model):
 
         Args:
             process_idx:
-            mp_queue: multiprocessing queue
             model:
 
         Returns:
diff --git a/pytorch_lightning/accelerators/ddp_hpc_accelerator.py b/pytorch_lightning/accelerators/ddp_hpc_accelerator.py
index 47c1b736fd8b4..cf6aad9999223 100644
--- a/pytorch_lightning/accelerators/ddp_hpc_accelerator.py
+++ b/pytorch_lightning/accelerators/ddp_hpc_accelerator.py
@@ -112,7 +112,6 @@ def ddp_train(self, process_idx, model):
 
         Args:
             process_idx:
-            mp_queue: multiprocessing queue
             model:
 
         Returns:
diff --git a/pytorch_lightning/accelerators/ddp_spawn_accelerator.py b/pytorch_lightning/accelerators/ddp_spawn_accelerator.py
index 9410984df20fc..e23943e9262f8 100644
--- a/pytorch_lightning/accelerators/ddp_spawn_accelerator.py
+++ b/pytorch_lightning/accelerators/ddp_spawn_accelerator.py
@@ -86,7 +86,7 @@ def train(self):
         self.__recover_child_process_weights(model, best_path, last_path)
         return results
 
-    def ddp_train(self, process_idx, mp_queue, model, is_master=False, proc_offset=0):
+    def ddp_train(self, process_idx, mp_queue, model, is_master: bool = False, proc_offset: int = 0):
         """
         Entry point for ddp
 
diff --git a/pytorch_lightning/accelerators/tpu_accelerator.py b/pytorch_lightning/accelerators/tpu_accelerator.py
index 6a3263095ee67..66fc236a2a775 100644
--- a/pytorch_lightning/accelerators/tpu_accelerator.py
+++ b/pytorch_lightning/accelerators/tpu_accelerator.py
@@ -171,7 +171,6 @@ def to_device(self, batch):
 
         Args:
             batch: A tensor or collection of tensors.
-            tpu_id: The id of the TPU core. If omitted, the first available core is chosen.
 
         Return:
             the tensor on the TPU device.
diff --git a/pytorch_lightning/metrics/functional/average_precision.py b/pytorch_lightning/metrics/functional/average_precision.py
index da4f37b073206..20317b81d5265 100644
--- a/pytorch_lightning/metrics/functional/average_precision.py
+++ b/pytorch_lightning/metrics/functional/average_precision.py
@@ -67,7 +67,7 @@ def average_precision(
             which for binary problem is translate to 1. For multiclass problems
             this argument should not be set as we iteratively change it in the
             range [0,num_classes-1]
-        sample_weight: sample weights for each data point
+        sample_weights: sample weights for each data point
 
     Returns:
         tensor with average precision. If multiclass will return list
diff --git a/pytorch_lightning/metrics/functional/explained_variance.py b/pytorch_lightning/metrics/functional/explained_variance.py
index 20b38c58a2a6b..20550435ee370 100644
--- a/pytorch_lightning/metrics/functional/explained_variance.py
+++ b/pytorch_lightning/metrics/functional/explained_variance.py
@@ -62,7 +62,7 @@ def explained_variance(
     Computes explained variance.
 
     Args:
-        pred: estimated labels
+        preds: estimated labels
         target: ground truth labels
         multioutput: Defines aggregation in the case of multiple output scores. Can be one
             of the following strings (default is `'uniform_average'`.):
diff --git a/pytorch_lightning/metrics/functional/f_beta.py b/pytorch_lightning/metrics/functional/f_beta.py
index 2b0ba194d56f0..c294d29805a6f 100755
--- a/pytorch_lightning/metrics/functional/f_beta.py
+++ b/pytorch_lightning/metrics/functional/f_beta.py
@@ -75,7 +75,7 @@ def fbeta(
     If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
 
     Args:
-        pred: estimated probabilities
+        preds: estimated probabilities
         target: ground-truth labels
         num_classes: Number of classes in the dataset.
         beta: Beta coefficient in the F measure.
@@ -128,7 +128,7 @@ def f1(
     If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
 
     Args:
-        pred: estimated probabilities
+        preds: estimated probabilities
         target: ground-truth labels
         num_classes: Number of classes in the dataset.
         threshold:
diff --git a/pytorch_lightning/metrics/functional/mean_squared_error.py b/pytorch_lightning/metrics/functional/mean_squared_error.py
index e418536b26973..f8407531e9057 100644
--- a/pytorch_lightning/metrics/functional/mean_squared_error.py
+++ b/pytorch_lightning/metrics/functional/mean_squared_error.py
@@ -34,7 +34,7 @@ def mean_squared_error(preds: torch.Tensor, target: torch.Tensor) -> torch.Tenso
     Computes mean squared error
 
     Args:
-        pred: estimated labels
+        preds: estimated labels
         target: ground truth labels
 
     Return:
diff --git a/pytorch_lightning/metrics/functional/mean_squared_log_error.py b/pytorch_lightning/metrics/functional/mean_squared_log_error.py
index 1b96e1a7abc10..59012a607bba3 100644
--- a/pytorch_lightning/metrics/functional/mean_squared_log_error.py
+++ b/pytorch_lightning/metrics/functional/mean_squared_log_error.py
@@ -34,7 +34,7 @@ def mean_squared_log_error(preds: torch.Tensor, target: torch.Tensor) -> torch.T
     Computes mean squared log error
 
     Args:
-        pred: estimated labels
+        preds: estimated labels
         target: ground truth labels
 
     Return:
diff --git a/pytorch_lightning/metrics/functional/precision_recall_curve.py b/pytorch_lightning/metrics/functional/precision_recall_curve.py
index e497c5f7b37c7..0d562f8d6c3ae 100644
--- a/pytorch_lightning/metrics/functional/precision_recall_curve.py
+++ b/pytorch_lightning/metrics/functional/precision_recall_curve.py
@@ -173,7 +173,7 @@ def precision_recall_curve(
             which for binary problem is translate to 1. For multiclass problems
             this argument should not be set as we iteratively change it in the
             range [0,num_classes-1]
-        sample_weight: sample weights for each data point
+        sample_weights: sample weights for each data point
 
     Returns: 3-element tuple containing
 
diff --git a/pytorch_lightning/metrics/functional/psnr.py b/pytorch_lightning/metrics/functional/psnr.py
index 4aec3d902b418..40d3b16e538dc 100644
--- a/pytorch_lightning/metrics/functional/psnr.py
+++ b/pytorch_lightning/metrics/functional/psnr.py
@@ -46,8 +46,6 @@ def psnr(
             - ``'elementwise_mean'``: takes the mean (default)
             - ``'sum'``: takes the sum
             - ``'none'``: no reduction will be applied
-        return_state: returns a internal state that can be ddp reduced
-            before doing the final calculation
 
     Return:
         Tensor with PSNR score
diff --git a/pytorch_lightning/metrics/functional/r2score.py b/pytorch_lightning/metrics/functional/r2score.py
index f689e3ac9cac1..82117dd688064 100644
--- a/pytorch_lightning/metrics/functional/r2score.py
+++ b/pytorch_lightning/metrics/functional/r2score.py
@@ -98,7 +98,7 @@ def r2score(
     be provided as the ``adjusted`` argument.
 
     Args:
-        pred: estimated labels
+        preds: estimated labels
         target: ground truth labels
         adjusted: number of independent regressors for calculating adjusted r2 score.
             Default 0 (standard r2 score).
diff --git a/pytorch_lightning/metrics/functional/roc.py b/pytorch_lightning/metrics/functional/roc.py
index ffd5f9f0ac79c..26fa6d07d4f61 100644
--- a/pytorch_lightning/metrics/functional/roc.py
+++ b/pytorch_lightning/metrics/functional/roc.py
@@ -98,7 +98,7 @@ def roc(
             which for binary problem is translate to 1. For multiclass problems
             this argument should not be set as we iteratively change it in the
             range [0,num_classes-1]
-        sample_weight: sample weights for each data point
+        sample_weights: sample weights for each data point
 
     Returns: 3-element tuple containing
 
diff --git a/pytorch_lightning/metrics/functional/ssim.py b/pytorch_lightning/metrics/functional/ssim.py
index b52744421aef2..a978ce8268161 100644
--- a/pytorch_lightning/metrics/functional/ssim.py
+++ b/pytorch_lightning/metrics/functional/ssim.py
@@ -125,7 +125,7 @@ def ssim(
     Computes Structual Similarity Index Measure
 
     Args:
-        pred: estimated image
+        preds: estimated image
         target: ground truth image
         kernel_size: size of the gaussian kernel (default: (11, 11))
         sigma: Standard deviation of the gaussian kernel (default: (1.5, 1.5))
diff --git a/pytorch_lightning/metrics/utils.py b/pytorch_lightning/metrics/utils.py
index d79d1a355db1e..e324dad33a6f1 100644
--- a/pytorch_lightning/metrics/utils.py
+++ b/pytorch_lightning/metrics/utils.py
@@ -232,7 +232,7 @@ def class_reduce(
 
     Args:
         num: numerator tensor
-        decom: denominator tensor
+        denom: denominator tensor
         weights: weights for each class
         class_reduction: reduction method for multiclass problems
 
diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index b7249dfd99980..1943a83644e29 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -16,10 +16,12 @@
 import threading
 from collections.abc import Iterable, Mapping
 from itertools import chain
+from typing import Optional
 
 import torch
+from torch import Tensor
 from torch.cuda._utils import _get_device_index
-from torch.nn import DataParallel
+from torch.nn import DataParallel, Module
 from torch.nn.parallel import DistributedDataParallel
 from torch.nn.parallel._functions import Gather
 
@@ -222,15 +224,20 @@ def warn_missing_output(fx_called):
         warning_cache.warn("Your training_step returned None. Make sure that was your intention!")
 
 
-def parallel_apply(modules, inputs, kwargs_tup=None, devices=None):  # pragma: no-cover
+def parallel_apply(
+        modules: Module,
+        inputs: Tensor,
+        kwargs_tup: Optional[tuple] = None,
+        devices: Optional[list] = None,
+):  # pragma: no-cover
     r"""Applies each `module` in :attr:`modules` in parallel on arguments
     contained in :attr:`inputs` (positional) and :attr:`kwargs_tup` (keyword)
     on each of :attr:`devices`.
 
     Args:
-        modules (Module): modules to be parallelized
-        inputs (tensor): inputs to the modules
-        devices (list of int or torch.device): CUDA devices
+        modules: modules to be parallelized
+        inputs: inputs to the modules
+        devices: CUDA devices
 
     :attr:`modules`, :attr:`inputs`, :attr:`kwargs_tup` (if given), and
     :attr:`devices` (if given) should all have same length. Moreover, each
diff --git a/pytorch_lightning/trainer/connectors/checkpoint_connector.py b/pytorch_lightning/trainer/connectors/checkpoint_connector.py
index f13765ac28ce4..d46e0e4cf3503 100644
--- a/pytorch_lightning/trainer/connectors/checkpoint_connector.py
+++ b/pytorch_lightning/trainer/connectors/checkpoint_connector.py
@@ -345,10 +345,11 @@ def hpc_load(self, checkpoint_path: str, on_gpu: bool):
         model.on_hpc_load(checkpoint)
 
     def max_ckpt_in_folder(self, dir_path: Union[str, Path], name_key: str = 'ckpt_') -> Optional[int]:
-        """List up files in `dir_path` with name_key, then yield maximum suffix number.
+        """List up files in `dir_path` with `name_key`, then yield maximum suffix number.
 
         Args:
             dir_path: path of directory which may contain files whose name include `name_key`
+            name_key: file name prefix
 
         Returns:
             None if no-corresponding-file else maximum suffix number
diff --git a/pytorch_lightning/utilities/argparse.py b/pytorch_lightning/utilities/argparse.py
index ff800802fef19..70d36e9dccccb 100644
--- a/pytorch_lightning/utilities/argparse.py
+++ b/pytorch_lightning/utilities/argparse.py
@@ -25,6 +25,7 @@ def from_argparse_args(cls, args: Union[Namespace, ArgumentParser], **kwargs):
     Eventually use varibles from OS environement which are defined as "PL_<CLASS-NAME>_<CLASS_ARUMENT_NAME>"
 
     Args:
+        cls: Lightning class
         args: The parser or namespace to take arguments from. Only known arguments will be
             parsed and passed to the :class:`Trainer`.
         **kwargs: Additional keyword arguments that may override ones in the parser or namespace.
@@ -139,6 +140,7 @@ def add_argparse_args(cls, parent_parser: ArgumentParser) -> ArgumentParser:
     r"""Extends existing argparse by default `Trainer` attributes.
 
     Args:
+        cls: Lightning class
         parent_parser:
             The custom cli arguments parser, which will be extended by
             the Trainer default arguments.

From 5f94900361ed53bf55caee66ad6d7eac5230b573 Mon Sep 17 00:00:00 2001
From: chaton <thomas@grid.ai>
Date: Thu, 7 Jan 2021 16:57:26 +0100
Subject: [PATCH 3/3] [Feat] Cleanup ModelCheckpoint / EarlyStopping by moving
 logic to LoggerConnector (#5218)

* [bug-fix] Metric reduction with Logging (#5150)

* add test

* resolve bug

* udpate test

* wrongly copy / paste

* update test

* resolve a second bug

Co-authored-by: Ubuntu <ubuntu@ip-172-31-62-109.ec2.internal>

* iupdate

* resolve bugs

* add test back

* correct flake8

* resolve flake8

* update on comments

* update tests

* add a test

* add test

* update to Callable

Co-authored-by: Ubuntu <ubuntu@ip-172-31-62-109.ec2.internal>
---
 CHANGELOG.md                                  |  3 +
 pytorch_lightning/callbacks/early_stopping.py | 13 +--
 .../callbacks/model_checkpoint.py             | 10 +--
 .../logger_connector/epoch_result_store.py    | 10 +--
 .../logger_connector/logger_connector.py      | 71 +++++++++++++---
 .../logger_connector/metrics_holder.py        | 80 +++++++++++++++++++
 .../trainer/logging/test_logger_connector.py  | 35 +++++++-
 7 files changed, 180 insertions(+), 42 deletions(-)
 create mode 100644 pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 928144320394a..8efcd6f06c2d6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -32,6 +32,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Changed
 
+- Changed `automatic casting` for LoggerConnector `metrics` ([#5218](https://github.com/PyTorchLightning/pytorch-lightning/pull/5218))
+
+
 - `stat_scores` metric now calculates stat scores over all classes and gains new parameters, in line with the new `StatScores` metric ([#4839](https://github.com/PyTorchLightning/pytorch-lightning/pull/4839))
 
 
diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index c8725f4cde6fd..fca39036c9404 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -19,14 +19,12 @@
 Monitor a metric and stop training when it stops improving.
 
 """
-import numbers
 
 import numpy as np
 import torch
 
 from pytorch_lightning.callbacks.base import Callback
-from pytorch_lightning.metrics.metric import Metric
-from pytorch_lightning.utilities import _TPU_AVAILABLE, rank_zero_info, rank_zero_warn
+from pytorch_lightning.utilities import rank_zero_info, rank_zero_warn
 
 
 class EarlyStopping(Callback):
@@ -196,15 +194,6 @@ def _run_early_stopping_check(self, trainer, pl_module):
         # when in dev debugging
         trainer.dev_debugger.track_early_stopping_history(self, current)
 
-        if current is not None:
-            if isinstance(current, Metric):
-                current = current.compute()
-            elif isinstance(current, numbers.Number):
-                current = torch.tensor(current, device=pl_module.device, dtype=torch.float)
-
-        if trainer.use_tpu and _TPU_AVAILABLE:
-            current = current.cpu()
-
         if self.monitor_op(current - self.min_delta, self.best_score):
             self.best_score = current
             self.wait_count = 0
diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py
index 7fd7a571a47ce..3fc2b54d98162 100644
--- a/pytorch_lightning/callbacks/model_checkpoint.py
+++ b/pytorch_lightning/callbacks/model_checkpoint.py
@@ -20,7 +20,6 @@
 
 """
 
-import numbers
 import os
 import re
 from copy import deepcopy
@@ -33,7 +32,6 @@
 
 from pytorch_lightning import _logger as log
 from pytorch_lightning.callbacks.base import Callback
-from pytorch_lightning.metrics.metric import Metric
 from pytorch_lightning.utilities import rank_zero_info, rank_zero_only, rank_zero_warn
 from pytorch_lightning.utilities.cloud_io import get_filesystem
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -554,12 +552,6 @@ def _save_top_k_checkpoints(self, trainer, pl_module, metrics):
         epoch = metrics.get("epoch")
         step = metrics.get("step")
 
-        if current is not None:
-            if isinstance(current, Metric):
-                current = current.compute()
-            elif isinstance(current, numbers.Number):
-                current = torch.tensor(current, device=pl_module.device, dtype=torch.float)
-
         if self.check_monitor_top_k(current):
             self._update_best_and_save(current, epoch, step, trainer, pl_module, metrics)
         elif self.verbose:
@@ -587,7 +579,7 @@ def _update_best_and_save(
             self.best_k_models.pop(del_filepath)
 
         # do not save nan, replace with +/- inf
-        if torch.isnan(current):
+        if isinstance(current, torch.Tensor) and torch.isnan(current):
             current = torch.tensor(float('inf' if self.mode == "min" else '-inf'))
 
         filepath = self._get_metric_interpolated_filepath_name(ckpt_name_metrics, epoch, step, del_filepath)
diff --git a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py b/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py
index dd12a2970727a..2796a61ee5c83 100644
--- a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py
+++ b/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py
@@ -379,7 +379,7 @@ def update_logger_connector(self) -> None:
 
             if is_train:
                 # Only log and add to callback epoch step during evaluation, test.
-                logger_connector.logged_metrics.update(batch_log_metrics)
+                logger_connector._logged_metrics.update(batch_log_metrics)
                 callback_metrics.update(batch_pbar_metrics)
                 callback_metrics.update(batch_log_metrics)
         else:
@@ -389,8 +389,8 @@ def update_logger_connector(self) -> None:
 
             # get logged_metrics
             epoch_log_metrics = self.get_epoch_log_metrics()
-            logger_connector.logged_metrics.update(epoch_log_metrics)
-            logger_connector.logged_metrics.update(epoch=self.trainer.current_epoch)
+            logger_connector._logged_metrics.update(epoch_log_metrics)
+            logger_connector._logged_metrics.update({"epoch": self.trainer.current_epoch})
 
             # get forked_metrics
             forked_metrics = self.get_forked_metrics()
@@ -403,8 +403,8 @@ def update_logger_connector(self) -> None:
             logger_connector.evaluation_callback_metrics.update(callback_metrics)
 
         # update callback_metrics
-        logger_connector.callback_metrics.update(callback_metrics)
-        logger_connector.callback_metrics.pop("epoch", None)
+        logger_connector._callback_metrics.update(callback_metrics)
+        logger_connector._callback_metrics.pop("epoch", None)
 
         batch_pbar_metrics.pop("debug_epoch", None)
         return batch_pbar_metrics, batch_log_metrics
diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py
index 6b55b3bce1b9a..73e9223fb7d0f 100644
--- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py
+++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py
@@ -14,7 +14,7 @@
 from copy import deepcopy
 import os
 from pprint import pprint
-from typing import Iterable, Union
+from typing import Any, Iterable, Union, Dict
 
 import torch
 
@@ -23,6 +23,7 @@
 from pytorch_lightning.loggers import LoggerCollection, TensorBoardLogger
 from pytorch_lightning.trainer.connectors.logger_connector.callback_hook_validator import CallbackHookNameValidator
 from pytorch_lightning.trainer.connectors.logger_connector.epoch_result_store import EpochResultStore, LoggerStages
+from pytorch_lightning.trainer.connectors.logger_connector.metrics_holder import MetricsHolder
 from pytorch_lightning.utilities import flatten_dict
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.model_helpers import is_overridden
@@ -31,19 +32,64 @@
 class LoggerConnector:
     def __init__(self, trainer):
         self.trainer = trainer
-        self.callback_metrics = {}
-        self.evaluation_callback_metrics = {}
-        self.logged_metrics = {}
-        self.progress_bar_metrics = {}
+        self._callback_metrics = MetricsHolder()
+        self._evaluation_callback_metrics = MetricsHolder(to_float=True)
+        self._logged_metrics = MetricsHolder()
+        self._progress_bar_metrics = MetricsHolder()
         self.eval_loop_results = []
         self._cached_results = {stage: EpochResultStore(trainer, stage) for stage in LoggerStages}
         self._callback_hook_validator = CallbackHookNameValidator()
         self._current_stage = None
 
+    @property
+    def callback_metrics(self) -> Dict:
+        return self.get_metrics("callback_metrics")
+
+    @callback_metrics.setter
+    def callback_metrics(self, callback_metrics: Dict) -> None:
+        self.set_metrics("callback_metrics", callback_metrics)
+
+    @property
+    def evaluation_callback_metrics(self) -> Dict:
+        return self.get_metrics("evaluation_callback_metrics")
+
+    @evaluation_callback_metrics.setter
+    def evaluation_callback_metrics(self, evaluation_callback_metrics: Dict) -> None:
+        self.set_metrics("evaluation_callback_metrics", evaluation_callback_metrics)
+
+    @property
+    def logged_metrics(self) -> Dict:
+        return self.get_metrics("logged_metrics")
+
+    @logged_metrics.setter
+    def logged_metrics(self, logged_metrics: Dict) -> None:
+        self.set_metrics("logged_metrics", logged_metrics)
+
+    @property
+    def progress_bar_metrics(self) -> Dict:
+        return self.get_metrics("progress_bar_metrics")
+
+    @progress_bar_metrics.setter
+    def progress_bar_metrics(self, progress_bar_metrics: Dict) -> None:
+        self.set_metrics("progress_bar_metrics", progress_bar_metrics)
+
     @property
     def cached_results(self) -> Union[EpochResultStore, None]:
         return self._cached_results.get(self._current_stage)    # type: ignore
 
+    def get_metrics(self, key: str) -> Dict:
+        metrics_holder = getattr(self, f"_{key}", None)
+        model_ref = self.trainer.get_model()
+        metrics_holder.convert(
+            self.trainer.use_tpu,
+            model_ref.device if model_ref is not None else model_ref
+        )
+        return metrics_holder.metrics
+
+    def set_metrics(self, key: str, val: Any) -> None:
+        metrics_holder = getattr(self, f"_{key}", None)
+        metrics_holder.reset(val)
+
     def set_stage(self, stage_or_testing: Union[str, bool], reset: bool = False) -> None:
         self._current_stage = LoggerStages.determine_stage(stage_or_testing)
         if reset:
@@ -153,10 +199,10 @@ def cache_training_step_metrics(self, opt_closure_result):
         if len(pbar_metrics_tmp) > 0:
             self.add_progress_bar_metrics(pbar_metrics_tmp)
 
-        self.callback_metrics.update(callback_metrics_tmp)
+        self._callback_metrics.update(callback_metrics_tmp)
 
         # save legacy log metrics
-        self.logged_metrics.update(logged_metrics_tmp)
+        self._logged_metrics.update(logged_metrics_tmp)
         self.cached_results.legacy_batch_log_metrics.update(logged_metrics_tmp)
 
     def log_metrics(self, metrics, grad_norm_dic, step=None, log_train_step_metrics=False):
@@ -209,7 +255,7 @@ def add_progress_bar_metrics(self, metrics):
             if isinstance(v, torch.Tensor):
                 v = v.item()
 
-            self.progress_bar_metrics[k] = v
+            self._progress_bar_metrics.metrics[k] = v
 
         self.trainer.dev_debugger.track_pbar_metrics_history(metrics)
 
@@ -311,6 +357,7 @@ def _track_callback_metrics(self, eval_results, using_eval_result):
                 if 'val_loss' in flat:
                     flat['checkpoint_on'] = flat['val_loss']
                     flat['early_stop_on'] = flat['val_loss']
+
                 self.trainer.logger_connector.callback_metrics.update(flat)
                 if self.trainer.testing:
                     self.trainer.logger_connector.evaluation_callback_metrics.update(flat)
@@ -441,15 +488,15 @@ def log_train_epoch_end_metrics(
         # add the metrics to the loggers and callbacks
         if epoch_log_metrics and len(epoch_log_metrics) > 0:
             self.log_metrics(epoch_log_metrics, {})
-            self.callback_metrics.update(epoch_log_metrics)
+            self._callback_metrics.update(epoch_log_metrics)
 
         # add metrics to callbacks
-        self.callback_metrics.update(epoch_callback_metrics)
+        self._callback_metrics.update(epoch_callback_metrics)
 
         # add metrics to progress_bar and callbacks
         if len(epoch_progress_bar_metrics) > 0:
             self.add_progress_bar_metrics(epoch_progress_bar_metrics)
-            self.callback_metrics.update(epoch_progress_bar_metrics)
+            self._callback_metrics.update(epoch_progress_bar_metrics)
 
         # reset epoch loop result for next epoch
         self.cached_results.reset()
@@ -605,4 +652,4 @@ def log_train_step_metrics(self, batch_output):
                 grad_norm_dic = {}
             if len(batch_log_metrics) > 0 or len(grad_norm_dic) > 0:
                 self.log_metrics(batch_log_metrics, grad_norm_dic, log_train_step_metrics=True)
-                self.callback_metrics.update(batch_log_metrics)
+                self._callback_metrics.update(batch_log_metrics)
diff --git a/pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py b/pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py
new file mode 100644
index 0000000000000..d2e2c9b7870cf
--- /dev/null
+++ b/pytorch_lightning/trainer/connectors/logger_connector/metrics_holder.py
@@ -0,0 +1,80 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numbers
+from typing import Any
+
+import torch
+
+from pytorch_lightning.metrics.metric import Metric
+from pytorch_lightning.utilities import _TPU_AVAILABLE
+
+
+class MetricsHolder:
+
+    """
+    This class acts as a dictonary holder.
+    It holds metrics and implements conversion functions.
+    Those functions will be triggered within LoggerConnector
+    when the property is being requested from the user.
+    """
+
+    def __init__(self, to_float: bool = False):
+        self.metrics = {}
+        self._to_float = to_float
+
+    def update(self, metrics):
+        self.metrics.update(metrics)
+
+    def pop(self, key, default):
+        return self.metrics.pop(key, default)
+
+    def reset(self, metrics):
+        self.metrics = metrics
+
+    def convert(self, use_tpu: bool, device: torch.device):
+        for key, value in self.metrics.items():
+            self.metrics[key] = self._convert(value, use_tpu, device)
+
+    def _convert(self, current: Any, use_tpu: bool, device: torch.device):
+        if self._to_float:
+            return self._convert_to_float(current, use_tpu, device)
+        return self._convert_to_tensor(current, use_tpu, device)
+
+    def _convert_to_float(self, current, use_tpu: bool, device: torch.device):
+        if isinstance(current, Metric):
+            current = current.compute().detach()
+
+        if isinstance(current, torch.Tensor):
+            current = float(current.item())
+
+        elif isinstance(current, int):
+            current = float(current)
+
+        return current
+
+    def _convert_to_tensor(self, current: Any, use_tpu: bool, device: torch.device):
+        if current is not None:
+            if isinstance(current, Metric):
+                current = current.compute().detach()
+
+            elif isinstance(current, numbers.Number):
+                if device is None:
+                    current = torch.tensor(current, dtype=torch.float)
+                else:
+                    current = torch.tensor(current, device=device, dtype=torch.float)
+
+        if use_tpu and _TPU_AVAILABLE:
+            current = current.cpu()
+
+        return current
diff --git a/tests/trainer/logging/test_logger_connector.py b/tests/trainer/logging/test_logger_connector.py
index 56e5765c7f4b8..f911c793b0707 100644
--- a/tests/trainer/logging/test_logger_connector.py
+++ b/tests/trainer/logging/test_logger_connector.py
@@ -15,6 +15,7 @@
 Tests to ensure that the training loop works with a dict (1.0)
 """
 from copy import deepcopy
+from typing import Any, Callable
 
 import pytest
 import torch
@@ -22,15 +23,17 @@
 
 from pytorch_lightning.callbacks.base import Callback
 from pytorch_lightning.core.step_result import Result
+from pytorch_lightning.metrics import Accuracy
 from pytorch_lightning.trainer import Trainer
 from pytorch_lightning.trainer.connectors.logger_connector.callback_hook_validator import CallbackHookNameValidator
+from pytorch_lightning.trainer.connectors.logger_connector.metrics_holder import MetricsHolder
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.base.boring_model import BoringModel, RandomDataset
 
 
-def decorator_with_arguments(fx_name='', hook_fx_name=None):
-    def decorator(func):
-        def wrapper(self, *args, **kwargs):
+def decorator_with_arguments(fx_name: str = '', hook_fx_name: str = None) -> Callable:
+    def decorator(func: Callable) -> Callable:
+        def wrapper(self, *args, **kwargs) -> Any:
             # Set information
             self._current_fx_name = fx_name
             self._current_hook_fx_name = hook_fx_name
@@ -43,7 +46,6 @@ def wrapper(self, *args, **kwargs):
             return result
 
         return wrapper
-
     return decorator
 
 
@@ -425,3 +427,28 @@ def test_dataloader(self):
     )
     trainer.fit(model)
     trainer.test(model, ckpt_path=None)
+
+
+@pytest.mark.parametrize('to_float', [False, True])
+def test_metrics_holder(to_float, tmpdir):
+
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    preds = torch.tensor([[0.9, 0.1]], device=device)
+
+    def is_float(value: Any) -> bool:
+        return isinstance(value, float)
+
+    excepted_function = is_float if to_float else torch.is_tensor
+    targets = torch.tensor([1], device=device)
+    acc = Accuracy().to(device)
+    metric_holder = MetricsHolder(to_float=to_float)
+    metric_holder.update({
+        "x": 1,
+        "y": torch.tensor(2),
+        "z": acc(preds, targets),
+    })
+    metric_holder.convert(False, device)
+    metrics = metric_holder.metrics
+    assert excepted_function(metrics["x"])
+    assert excepted_function(metrics["y"])
+    assert excepted_function(metrics["z"])