Lightning-AI · tchaton · Oct 20, 2020 · Oct 20, 2020 · Oct 20, 2020 · Oct 20, 2020
@@ -11,33 +11,33 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import os
+import tempfile
 import collections
 import copy
 import inspect
-import os
 import re
-import tempfile
 from abc import ABC
 from argparse import Namespace
-from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
 from pytorch_lightning import _logger as log
 from pytorch_lightning.core.grads import GradInformation
 from pytorch_lightning.core.hooks import CheckpointHooks, DataHooks, ModelHooks
 from pytorch_lightning.core.memory import ModelSummary
 from pytorch_lightning.core.saving import ALLOWED_CONFIG_TYPES, PRIMITIVE_TYPES, ModelIO
+from pytorch_lightning.core.step_result import Result
 from pytorch_lightning.utilities import rank_zero_warn, AMPType
 from pytorch_lightning.utilities.device_dtype_mixin import DeviceDtypeModuleMixin
 from pytorch_lightning.utilities.xla_device_utils import XLADeviceUtils
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.core.step_result import Result
 from pytorch_lightning.utilities.parsing import (
     AttributeDict,
     collect_init_args,
     get_init_args,
 )
+from pytorch_lightning.callbacks import Callback
 from torch import ScriptModule, Tensor
 from torch.nn import Module
 from torch.optim.optimizer import Optimizer
@@ -111,6 +111,8 @@ def __init__(self, *args, **kwargs):
         self._datamodule = None
         self._results: Optional[Result] = None
         self._current_fx_name = ''
+        self._current_hook_fx_name = ''
+        self._current_dataloader_idx = None
 
     def optimizers(self):
         opts = self.trainer.optimizers
@@ -244,6 +246,17 @@ def log(
             on_step = self.__auto_choose_log_on_step(on_step)
             on_epoch = self.__auto_choose_log_on_epoch(on_epoch)
 
+            if self._current_hook_fx_name != '':
+                self.trainer.logger_connector.callback_logging_validator\
+                    .validate_callback_logging_arguments(self._current_hook_fx_name,
+                                                         on_step=on_step,
+                                                         on_epoch=on_epoch)
+
+            # make sure user doesn't introduce logic for multi-dataloaders
+            if "/dataloader_idx_" in name:
+                raise MisconfigurationException(
+                    f"Logged key: {name} should not contain information about dataloader_idx.")
+
             self._results.log(
                 name,
                 value,
@@ -257,7 +270,8 @@ def log(
                 enable_graph,
                 sync_dist,
                 sync_dist_op,
-                sync_dist_group
+                sync_dist_group,
+                self._current_dataloader_idx,
             )
 
     def log_dict(
@@ -950,7 +964,8 @@ def configure_optimizers(
             - Single optimizer.
             - List or Tuple - List of optimizers.
             - Two lists - The first list has multiple optimizers, the second a list of LR schedulers (or lr_dict).
-            - Dictionary, with an 'optimizer' key, and (optionally) a 'lr_scheduler' key which value is a single LR scheduler or lr_dict.
+            - Dictionary, with an 'optimizer' key, and (optionally) a 'lr_scheduler' key which value is a single LR
+              scheduler or lr_dict.
             - Tuple of dictionaries as described, with an optional 'frequency' key.
             - None - Fit will run without any optimizer.
 
@@ -1278,11 +1293,11 @@ def tbptt_split_batch(self, batch, split_size):
             batch_split = []
             for i, x in enumerate(batch):
                 if isinstance(x, torch.Tensor):
-                    split_x = x[:, t : t + split_size]
+                    split_x = x[:, t: t + split_size]
                 elif isinstance(x, collections.Sequence):
                     split_x = [None] * len(x)
                     for batch_idx in range(len(x)):
-                        split_x[batch_idx] = x[batch_idx][t : t + split_size]
+                        split_x[batch_idx] = x[batch_idx][t: t + split_size]
 
                 batch_split.append(split_x)
 

@@ -124,6 +124,7 @@ def log(
         sync_dist: bool = False,
         sync_dist_op: Union[Any, str] = 'mean',
         sync_dist_group: Optional[Any] = None,
+        dataloader_idx: Optional[int] = None,
     ):
         # no metrics should be logged with graphs
         if not enable_graph and isinstance(value, torch.Tensor):
@@ -144,6 +145,7 @@ def log(
 
             # set step version
             step_name = f'{name}_step'
+
             self.__set_meta(
                 step_name,
                 value,
@@ -154,12 +156,15 @@ def log(
                 reduce_fx=reduce_fx,
                 tbptt_reduce_fx=tbptt_reduce_fx,
                 tbptt_pad_token=tbptt_pad_token,
-                forked=False
+                forked=False,
+                dataloader_idx=dataloader_idx,
             )
+
             self.__setitem__(step_name, value)
 
             # set epoch version
             epoch_name = f'{name}_epoch'
+
             self.__set_meta(
                 epoch_name,
                 value,
@@ -170,7 +175,8 @@ def log(
                 reduce_fx=reduce_fx,
                 tbptt_reduce_fx=tbptt_reduce_fx,
                 tbptt_pad_token=tbptt_pad_token,
-                forked=False
+                forked=False,
+                dataloader_idx=dataloader_idx,
             )
             self.__setitem__(epoch_name, value)
 
@@ -185,7 +191,8 @@ def log(
             reduce_fx,
             tbptt_reduce_fx=tbptt_reduce_fx,
             tbptt_pad_token=tbptt_pad_token,
-            forked=was_forked
+            forked=was_forked,
+            dataloader_idx=dataloader_idx,
         )
 
         # set the value
@@ -202,7 +209,8 @@ def __set_meta(
         reduce_fx: Callable,
         tbptt_pad_token: int,
         tbptt_reduce_fx: Callable,
-        forked: bool
+        forked: bool,
+        dataloader_idx: Union[int, None]
     ):
         # set the meta for the item
         meta_value = value
@@ -215,7 +223,8 @@ def __set_meta(
             value=meta_value,
             tbptt_reduce_fx=tbptt_reduce_fx,
             tbptt_pad_token=tbptt_pad_token,
-            forked=forked
+            forked=forked,
+            dataloader_idx=dataloader_idx,
         )
 
         self['meta'][name] = meta
@@ -242,7 +251,13 @@ def get_callback_metrics(self) -> dict:
 
         return result
 
-    def get_batch_log_metrics(self, include_forked_originals=True) -> dict:
+    def _add_dataloader_idx(self, k: str, dataloader_idx: Union[int, None], add_dataloader_idx: bool) -> str:
+        if dataloader_idx is not None and add_dataloader_idx:
+            return f"{k}/dataloader_idx_{dataloader_idx}"
+        else:
+            return k
+
+    def get_batch_log_metrics(self, include_forked_originals=True, add_dataloader_idx=False) -> dict:
         """
         Gets the metrics to log at the end of the batch step
 
@@ -257,15 +272,17 @@ def get_batch_log_metrics(self, include_forked_originals=True) -> dict:
             if options['forked'] and not include_forked_originals:
                 continue
 
+            dl_key = self._add_dataloader_idx(k, options["dataloader_idx"], add_dataloader_idx)
+
             if options['logger'] and options['on_step']:
                 if isinstance(self[k], Metric):
-                    result[k] = self[k]._forward_cache
+                    result[dl_key] = self[k]._forward_cache
                 else:
-                    result[k] = self[k]
+                    result[dl_key] = self[k]
 
         return result
 
-    def get_epoch_log_metrics(self) -> dict:
+    def get_epoch_log_metrics(self, add_dataloader_idx=False) -> dict:
         """
         Gets the metrics to log at the end of epoch
         """
@@ -279,19 +296,21 @@ def get_epoch_log_metrics(self) -> dict:
             if options['forked']:
                 continue
 
+            dl_key = self._add_dataloader_idx(k, options["dataloader_idx"], add_dataloader_idx)
+
             if options['logger'] and options['on_epoch']:
                 if isinstance(self[k], Metric):
-                    result[k] = self[k].compute()
+                    result[dl_key] = self[k].compute()
                 else:
-                    result[k] = self[k]
+                    result[dl_key] = self[k]
 
             if k in self and not options['on_epoch'] and isinstance(self[k], Metric):
                 # compute metric on epoch anyway so state does not accumulate
                 self[k].compute()
 
         return result
 
-    def get_epoch_pbar_metrics(self):
+    def get_epoch_pbar_metrics(self, add_dataloader_idx=False):
         """
         Gets the metrics to log at the end of epoch
         """
@@ -305,19 +324,21 @@ def get_epoch_pbar_metrics(self):
             if options['forked']:
                 continue
 
+            dl_key = self._add_dataloader_idx(k, options["dataloader_idx"], add_dataloader_idx)
+
             if options['prog_bar'] and options['on_epoch']:
                 if isinstance(self[k], Metric):
-                    result[k] = self[k].compute()
+                    result[dl_key] = self[k].compute()
                 else:
-                    result[k] = self[k]
+                    result[dl_key] = self[k]
 
             if k in self and not options['on_epoch'] and isinstance(self[k], Metric):
                 # compute metric on epoch anyway so state does not accumulate
                 self[k].compute()
 
         return result
 
-    def get_forked_metrics(self):
+    def get_forked_metrics(self, add_dataloader_idx=False):
         """
         Gets the metrics to log at the end of epoch
         """
@@ -328,12 +349,14 @@ def get_forked_metrics(self):
             if k == '_internal':
                 continue
 
+            dl_key = self._add_dataloader_idx(k, options["dataloader_idx"], add_dataloader_idx)
+
             if options['forked']:
-                result[k] = self[k]
+                result[dl_key] = self[k]
 
         return result
 
-    def get_batch_pbar_metrics(self, include_forked_originals=True):
+    def get_batch_pbar_metrics(self, include_forked_originals=True, add_dataloader_idx=False):
         """
         Gets the metrics to log at the end of the batch step
         """
@@ -347,11 +370,13 @@ def get_batch_pbar_metrics(self, include_forked_originals=True):
             if options['forked'] and not include_forked_originals:
                 continue
 
+            dl_key = self._add_dataloader_idx(k, options["dataloader_idx"], add_dataloader_idx)
+
             if options['prog_bar'] and options['on_step']:
                 if isinstance(self[k], Metric):
-                    result[k] = self[k]._forward_cache
+                    result[dl_key] = self[k]._forward_cache
                 else:
-                    result[k] = self[k]
+                    result[dl_key] = self[k]
 
         return result
 

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
+from abc import ABC
 from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, ProgressBarBase, ProgressBar
 from pytorch_lightning.utilities import rank_zero_warn
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -61,6 +62,8 @@ def init_default_checkpoint_callback(self, checkpoint_callback):
             checkpoint_callback = ModelCheckpoint(dirpath=None, filename=None)
         elif checkpoint_callback is False:
             checkpoint_callback = None
+        if checkpoint_callback:
+            checkpoint_callback.save_function = self.trainer.save_checkpoint
 
         return checkpoint_callback
 
@@ -81,5 +84,4 @@ def configure_progress_bar(self, refresh_rate=1, process_position=0):
             self.trainer.callbacks.append(progress_bar_callback)
         else:
             progress_bar_callback = None
-
         return progress_bar_callback