diff --git a/CHANGELOG.md b/CHANGELOG.md index f5fa80353cab8..89ab2d7d3cee7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -81,6 +81,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed mistake in parameters' grad norm tracking ([#2012](https://github.com/PyTorchLightning/pytorch-lightning/pull/2012)) - Fixed CPU and hanging GPU crash ([#2118](https://github.com/PyTorchLightning/pytorch-lightning/pull/2118)) +- Fixed an issue with the model summary and `example_input_array` depending on a specific ordering of the submodules in a LightningModule ([#1773](https://github.com/PyTorchLightning/pytorch-lightning/pull/1773)) + ## [0.7.6] - 2020-05-16 ### Added diff --git a/benchmarks/parity_modules.py b/benchmarks/parity_modules.py deleted file mode 100644 index 344debe46a3c4..0000000000000 --- a/benchmarks/parity_modules.py +++ /dev/null @@ -1,77 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.utils.data import Dataset, DataLoader - -from pytorch_lightning import LightningModule -from tests.base.datasets import MNIST - - -class AverageDataset(Dataset): - def __init__(self, dataset_len=300, sequence_len=100): - self.dataset_len = dataset_len - self.sequence_len = sequence_len - self.input_seq = torch.randn(dataset_len, sequence_len, 10) - top, bottom = self.input_seq.chunk(2, -1) - self.output_seq = top + bottom.roll(shifts=1, dims=-1) - - def __len__(self): - return self.dataset_len - - def __getitem__(self, item): - return self.input_seq[item], self.output_seq[item] - - -class ParityModuleRNN(LightningModule): - def __init__(self): - super().__init__() - self.rnn = nn.LSTM(10, 20, batch_first=True) - self.linear_out = nn.Linear(in_features=20, out_features=5) - - def forward(self, x): - seq, last = self.rnn(x) - return self.linear_out(seq) - - def training_step(self, batch, batch_nb): - x, y = batch - y_hat = self(x) - loss = F.mse_loss(y_hat, y) - return {'loss': loss} - - def configure_optimizers(self): - return torch.optim.Adam(self.parameters(), lr=0.02) - - def train_dataloader(self): - return DataLoader(AverageDataset(), batch_size=30) - - -class ParityModuleMNIST(LightningModule): - - def __init__(self): - super().__init__() - self.c_d1 = nn.Linear(in_features=28 * 28, out_features=128) - self.c_d1_bn = nn.BatchNorm1d(128) - self.c_d1_drop = nn.Dropout(0.3) - self.c_d2 = nn.Linear(in_features=128, out_features=10) - - def forward(self, x): - x = x.view(x.size(0), -1) - x = self.c_d1(x) - x = torch.tanh(x) - x = self.c_d1_bn(x) - x = self.c_d1_drop(x) - x = self.c_d2(x) - return x - - def training_step(self, batch, batch_nb): - x, y = batch - y_hat = self(x) - loss = F.cross_entropy(y_hat, y) - return {'loss': loss} - - def configure_optimizers(self): - return torch.optim.Adam(self.parameters(), lr=0.02) - - def train_dataloader(self): - return DataLoader(MNIST(train=True, download=True,), - batch_size=128) diff --git a/benchmarks/test_parity.py b/benchmarks/test_parity.py index 186dc57dac3e9..f38cab56dcb17 100644 --- a/benchmarks/test_parity.py +++ b/benchmarks/test_parity.py @@ -5,8 +5,8 @@ import torch import tests.base.utils as tutils -from benchmarks.parity_modules import ParityModuleRNN, ParityModuleMNIST from pytorch_lightning import Trainer, seed_everything +from tests.base.models import ParityModuleRNN, ParityModuleMNIST @pytest.mark.parametrize('cls_model,max_diff', [ diff --git a/docs/source/debugging.rst b/docs/source/debugging.rst index 412b6d613ecc6..741f94c524fee 100644 --- a/docs/source/debugging.rst +++ b/docs/source/debugging.rst @@ -55,17 +55,32 @@ argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`) trainer = Trainer(overfit_pct=0.01) -Print the parameter count by layer ----------------------------------- -Whenever the .fit() function gets called, the Trainer will print the weights summary for the lightningModule. -To disable this behavior, turn off this flag: - -(See: :paramref:`~pytorch_lightning.trainer.trainer.Trainer.weights_summary` -argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`) +Print a summary of your LightningModule +--------------------------------------- +Whenever the ``.fit()`` function gets called, the Trainer will print the weights summary for the LightningModule. +By default it only prints the top-level modules. If you want to show all submodules in your network, use the +`'full'` option: .. testcode:: - trainer = Trainer(weights_summary=None) + trainer = Trainer(weights_summary='full') + +You can also display the intermediate input- and output sizes of all your layers by setting the +``example_input_array`` attribute in your LightningModule. It will print a table like this + +.. code-block:: text + + | Name | Type | Params | In sizes | Out sizes + -------------------------------------------------------------- + 0 | net | Sequential | 132 K | [10, 256] | [10, 512] + 1 | net.0 | Linear | 131 K | [10, 256] | [10, 512] + 2 | net.1 | BatchNorm1d | 1 K | [10, 512] | [10, 512] + +when you call ``.fit()`` on the Trainer. This can help you find bugs in the composition of your layers. + +See Also: + - :paramref:`~pytorch_lightning.trainer.trainer.Trainer.weights_summary` Trainer argument + - :class:`~pytorch_lightning.core.memory.ModelSummary` Set the number of validation sanity steps diff --git a/pl_examples/domain_templates/generative_adversarial_net.py b/pl_examples/domain_templates/generative_adversarial_net.py index 4417e5e02c2be..427312ecaf942 100644 --- a/pl_examples/domain_templates/generative_adversarial_net.py +++ b/pl_examples/domain_templates/generative_adversarial_net.py @@ -93,6 +93,8 @@ def __init__(self, self.validation_z = torch.randn(8, self.latent_dim) + self.example_input_array = torch.zeros(2, hparams.latent_dim) + def forward(self, z): return self.generator(z) diff --git a/pl_examples/models/lightning_template.py b/pl_examples/models/lightning_template.py index b309094254118..dba605f5caae9 100644 --- a/pl_examples/models/lightning_template.py +++ b/pl_examples/models/lightning_template.py @@ -66,6 +66,8 @@ def __init__(self, self.c_d2 = nn.Linear(in_features=self.hidden_dim, out_features=self.out_features) + self.example_input_array = torch.zeros(2, 1, 28, 28) + def forward(self, x): """ No special modification required for Lightning, define it as you normally would diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 75b08ddc7ac1b..e5c470e3cf378 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -52,7 +52,6 @@ def __init__(self, *args, **kwargs): #: Pointer to the logger object self.logger = None - self.example_input_array = None #: True if using dp self.use_dp = False @@ -75,6 +74,17 @@ def __init__(self, *args, **kwargs): #: device reference self._device = torch.device('cpu') + # optionally can be set by user + self._example_input_array = None + + @property + def example_input_array(self) -> Any: + return self._example_input_array + + @example_input_array.setter + def example_input_array(self, example: Any) -> None: + self._example_input_array = example + @property def on_gpu(self): """ @@ -1445,9 +1455,10 @@ def val_dataloader(self): will have an argument ``dataset_idx`` which matches the order here. """ - def summarize(self, mode: str) -> None: + def summarize(self, mode: str = ModelSummary.MODE_DEFAULT) -> ModelSummary: model_summary = ModelSummary(self, mode=mode) - log.info('\n' + model_summary.__str__()) + log.info('\n' + str(model_summary)) + return model_summary def freeze(self) -> None: r""" diff --git a/pytorch_lightning/core/memory.py b/pytorch_lightning/core/memory.py index 8cbd281d6b024..738184b9c091a 100644 --- a/pytorch_lightning/core/memory.py +++ b/pytorch_lightning/core/memory.py @@ -1,164 +1,243 @@ -""" -Generates a summary of a model's layers and dimensionality -""" - -import gc import os import subprocess +from collections import OrderedDict from subprocess import PIPE -from typing import Tuple, Dict, Union, List +from typing import Tuple, Dict, Union, List, Any import numpy as np import torch -from torch.nn import Module +import torch.nn as nn import pytorch_lightning as pl +from pytorch_lightning.utilities.apply_func import apply_to_collection -from pytorch_lightning import _logger as log +PARAMETER_NUM_UNITS = [" ", "K", "M", "B", "T"] +UNKNOWN_SIZE = "?" -class ModelSummary(object): +class LayerSummary(object): + """ + Summary class for a single layer in a :class:`~pytorch_lightning.core.lightning.LightningModule`. + It collects the following information: + + - Type of the layer (e.g. Linear, BatchNorm1d, ...) + - Input shape + - Output shape + - Number of parameters + + The input and output shapes are only known after the example input array was + passed through the model. + + Example:: + + >>> model = torch.nn.Conv2d(3, 8, 3) + >>> summary = LayerSummary(model) + >>> summary.num_parameters + 224 + >>> summary.layer_type + 'Conv2d' + >>> output = model(torch.rand(1, 3, 5, 5)) + >>> summary.in_size + [1, 3, 5, 5] + >>> summary.out_size + [1, 8, 3, 3] - def __init__(self, model: 'pl.LightningModule', mode: str = 'full'): - """ Generates summaries of model layers and dimensions. """ - self.model = model - self.mode = mode - self.in_sizes = [] - self.out_sizes = [] + Args: + module: A module to summarize - self.summarize() + """ - def __str__(self): - return self.summary.__str__() + def __init__(self, module: nn.Module): + super().__init__() + self._module = module + self._hook_handle = self._register_hook() + self._in_size = None + self._out_size = None - def __repr__(self): - return self.summary.__str__() + def _register_hook(self): + """ + Registers a hook on the module that computes the input- and output size(s) + on the first forward pass. The hook will remove itself from the module, meaning that + recursive models will only record their input- and output shapes once. + """ + + def hook(module, inp, out): + if len(inp) == 1: + inp = inp[0] + self._in_size = parse_batch_shape(inp) + self._out_size = parse_batch_shape(out) + self._hook_handle.remove() # hook detaches itself from module + + return self._module.register_forward_hook(hook) + + @property + def in_size(self): + return self._in_size or UNKNOWN_SIZE + + @property + def out_size(self): + return self._out_size or UNKNOWN_SIZE + + @property + def layer_type(self) -> str: + """ Returns the class name of the module. """ + return str(self._module.__class__.__name__) + + @property + def num_parameters(self) -> int: + """ Returns the number of parameters in this module. """ + return sum(np.prod(p.shape) for p in self._module.parameters()) + + +class ModelSummary(object): + """ + Generates a summary of all layers in a :class:`~pytorch_lightning.core.lightning.LightningModule`. + + Args: + model: The model to summarize (also referred to as the root module) + mode: Can be one of + + - `top` (default): only the top-level modules will be recorded (the children of the root module) + - `full`: summarizes all layers and their submodules in the root module + + The string representation of this summary prints a table with columns containing + the name, type and number of parameters for each layer. + + The root module may also have an attribute ``example_input_array`` as shown in the example below. + If present, the root module will be called with it as input to determine the + intermediate input- and output shapes of all layers. Supported are tensors and + nested lists and tuples of tensors. All other types of inputs will be skipped and show as `?` + in the summary table. The summary will also display `?` for layers not used in the forward pass. + + Example:: + + >>> class LitModel(pl.LightningModule): + ... + ... def __init__(self): + ... super().__init__() + ... self.net = nn.Sequential(nn.Linear(256, 512), nn.BatchNorm1d(512)) + ... self.example_input_array = torch.zeros(10, 256) # optional + ... + ... def forward(self, x): + ... return self.net(x) + ... + >>> model = LitModel() + >>> ModelSummary(model, mode='top') # doctest: +NORMALIZE_WHITESPACE + | Name | Type | Params | In sizes | Out sizes + ------------------------------------------------------------ + 0 | net | Sequential | 132 K | [10, 256] | [10, 512] + >>> ModelSummary(model, mode='full') # doctest: +NORMALIZE_WHITESPACE + | Name | Type | Params | In sizes | Out sizes + -------------------------------------------------------------- + 0 | net | Sequential | 132 K | [10, 256] | [10, 512] + 1 | net.0 | Linear | 131 K | [10, 256] | [10, 512] + 2 | net.1 | BatchNorm1d | 1 K | [10, 512] | [10, 512] + """ + + MODE_TOP = "top" + MODE_FULL = "full" + MODE_DEFAULT = MODE_TOP + MODES = [MODE_FULL, MODE_TOP] + + def __init__(self, model: "pl.LightningModule", mode: str = MODE_DEFAULT): + self._model = model + self._mode = mode + self._layer_summary = self.summarize() - def named_modules(self) -> List[Tuple[str, Module]]: - if self.mode == 'full': - mods = self.model.named_modules() + @property + def named_modules(self) -> List[Tuple[str, nn.Module]]: + if self._mode == ModelSummary.MODE_FULL: + mods = self._model.named_modules() mods = list(mods)[1:] # do not include root module (LightningModule) - elif self.mode == 'top': + elif self._mode == ModelSummary.MODE_TOP: # the children are the top-level modules - mods = self.model.named_children() + mods = self._model.named_children() else: mods = [] return list(mods) - def get_variable_sizes(self) -> None: - """ Run sample input through each layer to get output sizes. """ - mods = self.named_modules() - in_sizes = [] - out_sizes = [] - input_ = self.model.example_input_array + @property + def layer_names(self) -> List[str]: + return list(self._layer_summary.keys()) - if self.model.on_gpu: - device = next(self.model.parameters()).get_device() - # test if input is a list or a tuple - if isinstance(input_, (list, tuple)): - input_ = [input_i.cuda(device) if torch.is_tensor(input_i) else input_i - for input_i in input_] - else: - input_ = input_.cuda(device) + @property + def layer_types(self) -> List[str]: + return [layer.layer_type for layer in self._layer_summary.values()] - if self.model.trainer.use_amp: - # test if it is not a list or a tuple - if isinstance(input_, (list, tuple)): - input_ = [input_i.half() if torch.is_tensor(input_i) else input_i - for input_i in input_] - else: - input_ = input_.half() + @property + def in_sizes(self) -> List: + return [layer.in_size for layer in self._layer_summary.values()] + + @property + def out_sizes(self) -> List: + return [layer.out_size for layer in self._layer_summary.values()] + + @property + def param_nums(self) -> List[int]: + return [layer.num_parameters for layer in self._layer_summary.values()] + def summarize(self) -> Dict[str, LayerSummary]: + summary = OrderedDict((name, LayerSummary(module)) for name, module in self.named_modules) + if self._model.example_input_array is not None: + self._forward_example_input() + return summary + + def _forward_example_input(self) -> None: + """ Run the example input through each layer to get input- and output sizes. """ + model = self._model + trainer = self._model.trainer + + input_ = model.example_input_array + input_ = model.transfer_batch_to_device(input_, model.device) + input_ = apply_to_collection(input_, torch.Tensor, lambda x: x.type(model.dtype)) + + if trainer is not None and trainer.use_amp: + if model.use_native_amp: + model.forward = torch.cuda.amp.autocast()(model.forward) + + mode = model.training + model.eval() with torch.no_grad(): + # let the model hooks collect the input- and output shapes + if isinstance(input_, (list, tuple)): + model(*input_) + elif isinstance(input_, dict): + model(**input_) + else: + model(input_) + model.train(mode) # restore mode of module - for _, m in mods: - if isinstance(input_, (list, tuple)): # pragma: no-cover - out = m(*input_) - else: - out = m(input_) - - if isinstance(input_, (list, tuple)): # pragma: no-cover - in_size = [] - for x in input_: - if isinstance(x, list): - in_size.append(len(x)) - else: - in_size.append(x.size()) - else: - in_size = np.array(input_.size()) - - in_sizes.append(in_size) - - if isinstance(out, (list, tuple)): # pragma: no-cover - out_size = np.asarray([x.size() for x in out]) - else: - out_size = np.array(out.size()) - - out_sizes.append(out_size) - input_ = out - - self.in_sizes = in_sizes - self.out_sizes = out_sizes - assert len(in_sizes) == len(out_sizes) - - def get_layer_names(self) -> None: - """ Collect Layer Names """ - mods = self.named_modules() - names = [] - layers = [] - for name, m in mods: - names += [name] - layers += [str(m.__class__)] - - layer_types = [x.split('.')[-1][:-2] for x in layers] - - self.layer_names = names - self.layer_types = layer_types - - def get_parameter_sizes(self) -> None: - """ Get sizes of all parameters in `model`. """ - mods = self.named_modules() - sizes = [] - for _, m in mods: - p = list(m.parameters()) - modsz = [np.array(param.size()) for param in p] - sizes.append(modsz) - - self.param_sizes = sizes - - def get_parameter_nums(self) -> None: - """ Get number of parameters in each layer. """ - param_nums = [] - for mod in self.param_sizes: - all_params = 0 - for p in mod: - all_params += np.prod(p) - param_nums.append(all_params) - self.param_nums = param_nums - - def make_summary(self) -> None: + def __str__(self): """ Makes a summary listing with: - Layer Name, Layer Type, Input Size, Output Size, Number of Parameters + Layer Name, Layer Type, Number of Parameters, Input Sizes, Output Sizes """ - arrays = [['Name', self.layer_names], - ['Type', self.layer_types], - ['Params', list(map(get_human_readable_count, self.param_nums))]] - if self.model.example_input_array is not None: - arrays.append(['In sizes', self.in_sizes]) - arrays.append(['Out sizes', self.out_sizes]) + arrays = [ + [" ", list(map(str, range(len(self._layer_summary))))], + ["Name", self.layer_names], + ["Type", self.layer_types], + ["Params", list(map(get_human_readable_count, self.param_nums))], + ] + if self._model.example_input_array is not None: + arrays.append(["In sizes", self.in_sizes]) + arrays.append(["Out sizes", self.out_sizes]) + + return _format_summary_table(*arrays) + + def __repr__(self): + return str(self) - self.summary = _format_summary_table(*arrays) - def summarize(self) -> None: - self.get_layer_names() - self.get_parameter_sizes() - self.get_parameter_nums() +def parse_batch_shape(batch: Any) -> Union[str, List]: + if hasattr(batch, "shape"): + return list(batch.shape) - if self.model.example_input_array is not None: - self.get_variable_sizes() - self.make_summary() + if isinstance(batch, (list, tuple)): + shape = [parse_batch_shape(el) for el in batch] + return shape + + return UNKNOWN_SIZE def _format_summary_table(*cols) -> str: @@ -170,68 +249,29 @@ def _format_summary_table(*cols) -> str: n_rows = len(cols[0][1]) n_cols = 1 + len(cols) - # Layer counter - counter = list(map(str, list(range(n_rows)))) - counter_len = max([len(c) for c in counter]) - - # Get formatting length of each column - length = [] + # Get formatting width of each column + col_widths = [] for c in cols: - str_l = len(c[0]) # default length is header length - for a in c[1]: - if isinstance(a, np.ndarray): - array_string = '[' + ', '.join([str(j) for j in a]) + ']' - str_l = max(len(array_string), str_l) - else: - str_l = max(len(a), str_l) - length.append(str_l) + col_width = max(len(str(a)) for a in c[1]) if n_rows else 0 + col_width = max(col_width, len(c[0])) # minimum length is header length + col_widths.append(col_width) # Formatting - s = '{:<{}}' - full_length = sum(length) + 3 * n_cols - header = [s.format(' ', counter_len)] + [s.format(c[0], l) for c, l in zip(cols, length)] + s = "{:<{}}" + total_width = sum(col_widths) + 3 * n_cols + header = [s.format(c[0], l) for c, l in zip(cols, col_widths)] # Summary = header + divider + Rest of table - summary = ' | '.join(header) + '\n' + '-' * full_length + summary = " | ".join(header) + "\n" + "-" * total_width for i in range(n_rows): - line = s.format(counter[i], counter_len) - for c, l in zip(cols, length): - if isinstance(c[1][i], np.ndarray): - array_string = '[' + ', '.join([str(j) for j in c[1][i]]) + ']' - line += ' | ' + array_string + ' ' * (l - len(array_string)) - else: - line += ' | ' + s.format(c[1][i], l) - summary += '\n' + line + line = [] + for c, l in zip(cols, col_widths): + line.append(s.format(str(c[1][i]), l)) + summary += "\n" + " | ".join(line) return summary -def print_mem_stack() -> None: # pragma: no-cover - for obj in gc.get_objects(): - try: - if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): - log.info(type(obj), obj.size()) - except Exception: - pass - - -def count_mem_items() -> Tuple[int, int]: # pragma: no-cover - num_params = 0 - num_tensors = 0 - for obj in gc.get_objects(): - try: - if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): - obj_type = str(type(obj)) - if 'parameter' in obj_type: - num_params += 1 - else: - num_tensors += 1 - except Exception: - pass - - return num_params, num_tensors - - def get_memory_profile(mode: str) -> Union[Dict[str, int], Dict[int, int]]: """ Get a profile of the current memory usage. @@ -251,11 +291,11 @@ def get_memory_profile(mode: str) -> Union[Dict[str, int], Dict[int, int]]: """ memory_map = get_gpu_memory_map() - if mode == 'min_max': + if mode == "min_max": min_index, min_memory = min(memory_map.items(), key=lambda item: item[1]) max_index, max_memory = max(memory_map.items(), key=lambda item: item[1]) - memory_map = {'min_gpu_mem': min_memory, 'max_gpu_mem': max_memory} + memory_map = {"min_gpu_mem": min_memory, "max_gpu_mem": max_memory} return memory_map @@ -268,18 +308,16 @@ def get_gpu_memory_map() -> Dict[str, int]: values are memory usage as integers in MB. """ result = subprocess.run( - [ - 'nvidia-smi', - '--query-gpu=memory.used', - '--format=csv,nounits,noheader', - ], - encoding='utf-8', + ["nvidia-smi", "--query-gpu=memory.used", "--format=csv,nounits,noheader",], + encoding="utf-8", # capture_output=True, # valid for python version >=3.7 - stdout=PIPE, stderr=PIPE, # for backward compatibility with python version 3.6 - check=True) + stdout=PIPE, + stderr=PIPE, # for backward compatibility with python version 3.6 + check=True, + ) # Convert lines into a dictionary gpu_memory = [int(x) for x in result.stdout.strip().split(os.linesep)] - gpu_memory_map = {f'gpu_{index}': memory for index, memory in enumerate(gpu_memory)} + gpu_memory_map = {f"gpu_{index}": memory for index, memory in enumerate(gpu_memory)} return gpu_memory_map @@ -310,11 +348,11 @@ def get_human_readable_count(number: int) -> str: """ assert number >= 0 - labels = [' ', 'K', 'M', 'B', 'T'] + labels = PARAMETER_NUM_UNITS num_digits = int(np.floor(np.log10(number)) + 1 if number > 0 else 1) num_groups = int(np.ceil(num_digits / 3)) num_groups = min(num_groups, len(labels)) # don't abbreviate beyond trillions shift = -3 * (num_groups - 1) number = number * (10 ** shift) index = num_groups - 1 - return f'{int(number):,d} {labels[index]}' + return f"{int(number):,d} {labels[index]}" diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 36035bbf9766c..3112cc305460e 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -9,8 +9,9 @@ from torch.utils.data import DataLoader from pytorch_lightning import _logger as log -from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, Callback, ProgressBarBase +from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, Callback from pytorch_lightning.core.lightning import LightningModule +from pytorch_lightning.core.memory import ModelSummary from pytorch_lightning.loggers import LightningLoggerBase from pytorch_lightning.profiler import SimpleProfiler, PassThroughProfiler, BaseProfiler from pytorch_lightning.trainer.auto_mix_precision import TrainerAMPMixin @@ -110,7 +111,7 @@ def __init__( distributed_backend: Optional[str] = None, precision: int = 32, print_nan_grads: bool = False, # backward compatible, todo: remove in v0.9.0 - weights_summary: Optional[str] = 'top', + weights_summary: Optional[str] = ModelSummary.MODE_DEFAULT, weights_save_path: Optional[str] = None, num_sanity_val_steps: int = 2, truncated_bptt_steps: Optional[int] = None, @@ -945,12 +946,13 @@ def run_pretrain_routine(self, model: LightningModule): self.register_slurm_signal_handlers() # print model summary - # TODO: remove self.testing condition because model.summarize() is wiping out the weights if self.is_global_zero and self.weights_summary is not None and not self.testing: - if self.weights_summary in ['full', 'top']: + if self.weights_summary in ModelSummary.MODES: ref_model.summarize(mode=self.weights_summary) else: - raise MisconfigurationException("weights_summary can be None, 'full' or 'top'") + raise MisconfigurationException( + "weights_summary can be None, " + ", ".join(ModelSummary.MODES) + ) # track model now. # if cluster resets state, the model will update with the saved weights diff --git a/tests/base/datasets.py b/tests/base/datasets.py index af6cb062dc80e..35edbfc948497 100644 --- a/tests/base/datasets.py +++ b/tests/base/datasets.py @@ -184,3 +184,19 @@ def prepare_data(self, download: bool) -> None: data, targets = torch.load(path_fname) data, targets = self._prepare_subset(data, targets, self.num_samples, self.digits) torch.save((data, targets), os.path.join(self.cached_folder_path, fname)) + + +class AverageDataset(Dataset): + + def __init__(self, dataset_len=300, sequence_len=100): + self.dataset_len = dataset_len + self.sequence_len = sequence_len + self.input_seq = torch.randn(dataset_len, sequence_len, 10) + top, bottom = self.input_seq.chunk(2, -1) + self.output_seq = top + bottom.roll(shifts=1, dims=-1) + + def __len__(self): + return self.dataset_len + + def __getitem__(self, item): + return self.input_seq[item], self.output_seq[item] diff --git a/tests/base/models.py b/tests/base/models.py index 77deb0766b9b6..7f295da59fd65 100644 --- a/tests/base/models.py +++ b/tests/base/models.py @@ -6,7 +6,7 @@ import torch.nn.functional as F from torch.utils.data import DataLoader -from tests.base.datasets import TrialMNIST +from tests.base.datasets import TrialMNIST, AverageDataset, MNIST try: from test_tube import HyperOptArgumentParser @@ -83,6 +83,8 @@ def __init__(self, hidden_dim, learning_rate, b1, b2, **kwargs): self.generated_imgs = None self.last_imgs = None + self.example_input_array = torch.rand(2, self.hidden_dim) + def forward(self, z): return self.generator(z) @@ -154,3 +156,58 @@ def configure_optimizers(self): def train_dataloader(self): return DataLoader(TrialMNIST(train=True, download=True), batch_size=16) + + +class ParityModuleRNN(LightningModule): + def __init__(self): + super().__init__() + self.rnn = nn.LSTM(10, 20, batch_first=True) + self.linear_out = nn.Linear(in_features=20, out_features=5) + + def forward(self, x): + seq, last = self.rnn(x) + return self.linear_out(seq) + + def training_step(self, batch, batch_nb): + x, y = batch + y_hat = self(x) + loss = F.mse_loss(y_hat, y) + return {'loss': loss} + + def configure_optimizers(self): + return torch.optim.Adam(self.parameters(), lr=0.02) + + def train_dataloader(self): + return DataLoader(AverageDataset(), batch_size=30) + + +class ParityModuleMNIST(LightningModule): + + def __init__(self): + super().__init__() + self.c_d1 = nn.Linear(in_features=28 * 28, out_features=128) + self.c_d1_bn = nn.BatchNorm1d(128) + self.c_d1_drop = nn.Dropout(0.3) + self.c_d2 = nn.Linear(in_features=128, out_features=10) + + def forward(self, x): + x = x.view(x.size(0), -1) + x = self.c_d1(x) + x = torch.tanh(x) + x = self.c_d1_bn(x) + x = self.c_d1_drop(x) + x = self.c_d2(x) + return x + + def training_step(self, batch, batch_nb): + x, y = batch + y_hat = self(x) + loss = F.cross_entropy(y_hat, y) + return {'loss': loss} + + def configure_optimizers(self): + return torch.optim.Adam(self.parameters(), lr=0.02) + + def train_dataloader(self): + return DataLoader(MNIST(train=True, download=True,), + batch_size=128) diff --git a/tests/core/__init__.py b/tests/core/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/tests/core/test_memory.py b/tests/core/test_memory.py new file mode 100644 index 0000000000000..6ddbb18ecf548 --- /dev/null +++ b/tests/core/test_memory.py @@ -0,0 +1,186 @@ +import pytest +import torch +import torch.nn as nn + +from pytorch_lightning import LightningModule +from pytorch_lightning.core.memory import UNKNOWN_SIZE, ModelSummary +from tests.base.models import ParityModuleRNN + + +class EmptyModule(LightningModule): + """ A module that has no layers """ + + def __init__(self): + super().__init__() + self.parameter = torch.rand(3, 3, requires_grad=True) + self.example_input_array = torch.zeros(1, 2, 3, 4, 5) + + def forward(self, *args, **kwargs): + return {'loss': self.parameter.sum()} + + +class UnorderedModel(LightningModule): + """ A model in which the layers not defined in order of execution """ + + def __init__(self): + super().__init__() + # note: the definition order is intentionally scrambled for this test + self.layer2 = nn.Linear(10, 2) + self.combine = nn.Linear(7, 9) + self.layer1 = nn.Linear(3, 5) + self.relu = nn.ReLU() + # this layer is unused, therefore input-/output shapes are unknown + self.unused = nn.Conv2d(1, 1, 1) + + self.example_input_array = (torch.rand(2, 3), torch.rand(2, 10)) + + def forward(self, x, y): + out1 = self.layer1(x) + out2 = self.layer2(y) + out = self.relu(torch.cat((out1, out2), 1)) + out = self.combine(out) + return out + + +@pytest.mark.parametrize(['mode'], [ + pytest.param(ModelSummary.MODE_FULL), + pytest.param(ModelSummary.MODE_TOP), +]) +def test_empty_model_summary_shapes(mode): + """ Test that the summary works for models that have no submodules. """ + model = EmptyModule() + summary = model.summarize(mode=mode) + assert summary.in_sizes == [] + assert summary.out_sizes == [] + assert summary.param_nums == [] + + +@pytest.mark.parametrize(['mode'], [ + pytest.param(ModelSummary.MODE_FULL), + pytest.param(ModelSummary.MODE_TOP), +]) +@pytest.mark.parametrize(['device', 'dtype'], [ + pytest.param(torch.device('cpu'), torch.double), + pytest.param(torch.device('cuda', 0), torch.float), + pytest.param(torch.device('cuda', 0), torch.float16), +]) +@pytest.mark.skipif(not torch.cuda.is_available(), reason="Test requires GPU.") +def test_linear_model_summary_shapes(device, dtype, mode): + """ Test that the model summary correctly computes the input- and output shapes. """ + model = UnorderedModel().type(dtype).to(device) + model.train() + summary = model.summarize(mode=mode) + assert summary.in_sizes == [ + [2, 10], # layer 2 + [2, 7], # combine + [2, 3], # layer 1 + [2, 7], # relu + UNKNOWN_SIZE, + ] + assert summary.out_sizes == [ + [2, 2], # layer 2 + [2, 9], # combine + [2, 5], # layer 1 + [2, 7], # relu + UNKNOWN_SIZE, + ] + assert model.training + assert model.dtype == dtype + assert model.device == device + + +@pytest.mark.parametrize(['mode'], [ + pytest.param(ModelSummary.MODE_FULL), + pytest.param(ModelSummary.MODE_TOP), +]) +def test_rnn_summary_shapes(mode): + """ Test that the model summary works for RNNs. """ + model = ParityModuleRNN() + + b = 3 + t = 5 + i = model.rnn.input_size + h = model.rnn.hidden_size + o = model.linear_out.out_features + + model.example_input_array = torch.zeros(b, t, 10) + + summary = model.summarize(mode=mode) + assert summary.in_sizes == [ + [b, t, i], # rnn + [b, t, h], # linear + ] + assert summary.out_sizes == [ + [[b, t, h], [[1, b, h], [1, b, h]]], # rnn + [b, t, o] # linear + ] + + +@pytest.mark.parametrize(['mode'], [ + pytest.param(ModelSummary.MODE_FULL), + pytest.param(ModelSummary.MODE_TOP), +]) +def test_summary_parameter_count(mode): + """ Test that the summary counts the number of parameters in every submodule. """ + model = UnorderedModel() + summary = model.summarize(mode=mode) + assert summary.param_nums == [ + model.layer2.weight.numel() + model.layer2.bias.numel(), + model.combine.weight.numel() + model.combine.bias.numel(), + model.layer1.weight.numel() + model.layer1.bias.numel(), + 0, # ReLU + model.unused.weight.numel() + model.unused.bias.numel(), + ] + + +@pytest.mark.parametrize(['mode'], [ + pytest.param(ModelSummary.MODE_FULL), + pytest.param(ModelSummary.MODE_TOP), +]) +def test_summary_layer_types(mode): + """ Test that the summary displays the layer names correctly. """ + model = UnorderedModel() + summary = model.summarize(mode=mode) + assert summary.layer_types == [ + 'Linear', + 'Linear', + 'Linear', + 'ReLU', + 'Conv2d', + ] + + +@pytest.mark.parametrize(['mode'], [ + pytest.param(ModelSummary.MODE_FULL), + pytest.param(ModelSummary.MODE_TOP), +]) +@pytest.mark.parametrize(['example_input', 'expected_size'], [ + pytest.param([], UNKNOWN_SIZE), + pytest.param((1, 2, 3), [UNKNOWN_SIZE] * 3), + pytest.param(torch.tensor(0), UNKNOWN_SIZE), + pytest.param(dict(tensor=torch.zeros(1, 2, 3)), UNKNOWN_SIZE), + pytest.param(torch.zeros(2, 3, 4), [2, 3, 4]), + pytest.param([torch.zeros(2, 3), torch.zeros(4, 5)], [[2, 3], [4, 5]]), + pytest.param((torch.zeros(2, 3), torch.zeros(4, 5)), [[2, 3], [4, 5]]), +]) +def test_example_input_array_types(example_input, expected_size, mode): + """ Test the types of example inputs supported for display in the summary. """ + + class DummyModule(nn.Module): + def forward(self, *args, **kwargs): + return None + + class DummyLightningModule(LightningModule): + + def __init__(self): + super().__init__() + self.layer = DummyModule() + + # this LightningModule and submodule accept any type of input + def forward(self, *args, **kwargs): + return self.layer(*args, **kwargs) + + model = DummyLightningModule() + model.example_input_array = example_input + summary = model.summarize(mode=mode) + assert summary.in_sizes == [expected_size]