From 195ee327ee020bb3edb21e2a8905e14bc3980b08 Mon Sep 17 00:00:00 2001 From: justusschock Date: Sat, 30 Jan 2021 14:51:36 +0100 Subject: [PATCH 01/22] add basic accelerator class. Co-Authored with @awaelchi --- pytorch_lightning/accelerators/accelerator.py | 364 ++++++++++++++++++ 1 file changed, 364 insertions(+) create mode 100644 pytorch_lightning/accelerators/accelerator.py diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py new file mode 100644 index 0000000000000..7dced06f5e6b7 --- /dev/null +++ b/pytorch_lightning/accelerators/accelerator.py @@ -0,0 +1,364 @@ +from pytorch_lightning.utilities.enums import LightningEnum +from typing import Any, Callable, Iterable, Optional, Union + +import torch +from torch.optim import Optimizer + +from pytorch_lightning.plugins import TrainingTypePlugin, HorovodPlugin +from pytorch_lightning.plugins.precision import ( + ApexMixedPrecisionPlugin, + MixedPrecisionPlugin, + NativeMixedPrecisionPlugin, + PrecisionPlugin, +) +from pytorch_lightning.core import LightningModule +from pytorch_lightning.utilities import AMPType +from pytorch_lightning.utilities.apply_func import move_data_to_device + + +class Accelerator(object): + """ + The Accelerator Base Class. + An Accelerator is meant to deal with one type of Hardware. + + Currently there are accelerators for: + - CPU + - GPU + - TPU + + Each Accelerator gets two plugins upon initialization: + One to handle differences from the training routine and one to handle different precisions. + + """ + + def __init__( + self, + precision_plugin: PrecisionPlugin, + training_type_plugin: TrainingTypePlugin, + ) -> None: + """ + + Args: + precision_plugin: the plugin to handle precision-specific parts + training_type_plugin: the plugin to handle different training routines + """ + self.precision_plugin = precision_plugin + self.training_type_plugin = training_type_plugin + + self.optimizers = None + self.lr_schedulers = None + self.optimizer_frequencies = None + + def setup(self, trainer: "Trainer", model: LightningModule) -> None: + """ + Connects the plugins to the training process, creates optimizers + + Args: + trainer: the trainer instance to connect to + model: the model to train + """ + self.connect_training_type_plugin(self.training_type_plugin, model) + self.setup_optimizers(trainer, model) + self.connect_precision_plugin(self.precision_plugin) + self.optimizers = trainer.convert_to_lightning_optimizers(self.optimizers) + + @property + def model(self) -> torch.nn.Module: + """Returns the model. This can also be a wrapped LightningModule. + For retrieving the pure LightningModule use :attr:`Accelerator.lightning_module` + + """ + return self.training_type_plugin.model + + @model.setter + def model(self, new_model: torch.nn.Module) -> None: + self.training_type_plugin.model = new_model + + @property + def lightning_module(self) -> LightningModule: + """Returns the pure LightningModule. + To get the potentially wrapped model use :attr:`Accelerator.model` + + """ + return self.training_type_plugin.lightning_module + + @property + def root_device(self) -> torch.device: + return self.training_type_plugin.root_device + + def teardown(self): + """This method is called to teardown the training process. + It is the right place to release memory and free other ressources. + """ + pass + + def batch_to_device(self, batch: Any, device: torch.device) -> Any: + """Moves the batch to the correct device. + The returned batch is of the same type as the input batch, just having all tensors on the correct device. + + Args: + batch: The batch of samples to move to the correct device + device: The target device + """ + model = self.lightning_module + if model is not None: + return model.transfer_batch_to_device(batch, device) + return move_data_to_device(batch, device) + + def on_train_start(self): + """Hook to do something upon the training start""" + pass + + def training_step(self, args): + """The actual training step. + + Args: + args: the arguments for the models training step. Can consist of the following: + batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]): + The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list. + batch_idx (int): Integer displaying index of this batch + optimizer_idx (int): When using multiple optimizers, this argument will also be present. + hiddens(:class:`~torch.Tensor`): Passed in if + :paramref:`~pytorch_lightning.trainer.trainer.Trainer.truncated_bptt_steps` > 0. + + """ + batch = self.to_device(args[0]) + + args[0] = batch + + with self.precision_plugin.train_step_context(): + with self.training_type_plugin.train_step_context(): + return self.lightning_module.training_step(*args) + + def validation_step(self, args): + """The actual validation step. + + Args: + args: the arguments for the models validation step. Can consist of the following: + batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]): + The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list. + batch_idx (int): The index of this batch + dataloader_idx (int): The index of the dataloader that produced this batch + (only if multiple val dataloaders used) + """ + batch = self.to_device(args[0]) + + args[0] = batch + + with self.precision_plugin.val_step_context(): + with self.training_type_plugin.val_step_context(): + return self.lightning_module.validation_step(*args) + + def test_step(self, args): + """The actual test step. + + Args: + args: the arguments for the models test step. Can consist of the following: + batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]): + The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list. + batch_idx (int): The index of this batch. + dataloader_idx (int): The index of the dataloader that produced this batch + (only if multiple test dataloaders used). + """ + batch = self.to_device(args[0]) + + args[0] = batch + + with self.precision_plugin.test_step_context(): + with self.training_type_plugin.test_step_context(): + return self.lightning_module.test_step(*args) + + def training_step_end(self, output): + """A hook to do something at the end of the training step + + Args: + output: the output of the training step + """ + return output + + def test_step_end(self, output): + """A hook to do something at the end of the test step + + Args: + output: the output of the test step + """ + return output + + def validation_step_end(self, output): + """A hook to do something at the end of the validation step + + Args: + output: the output of the validation step + """ + return output + + def process_dataloader( + self, dataloader: Union[Iterable, torch.utils.data.DataLoader] + ) -> Union[Iterable, torch.utils.data.DataLoader]: + """Wraps the dataloader if necessary + + Args: + dataloader: iterable. Ideally of type: :class:`torch.utils.data.DataLoader` + """ + return dataloader + + def backward( + self, + closure_loss: torch.Tensor, + optimizer: torch.optim.Optimizer, + opt_idx: int, + should_accumulate: bool, + *args, + **kwargs, + ) -> torch.Tensor: + """Forwards backward-calls to the precision plugin. + + Args: + closure_loss: a tensor holding the loss value to backpropagate + optimizer: the optimizer to do the step later on. + opt_idx: the index of the optimizer + should_accumulate: whether to accumulate gradients + """ + output = self.precision_plugin.backward( + self.lightning_module, closure_loss, optimizer, opt_idx, should_accumulate, *args, **kwargs + ) + + # TODO: this is a hack, find a better solution for this (hook?) + if isinstance(self.training_type_plugin, HorovodPlugin): + optimizer.synchronize() + + return output + + def optimizer_step(self, optimizer: torch.optim.Optimizer, current_epoch: int, batch_idx: int, opt_idx: int, lambda_closure: Callable): + """performs the actual optimizer step. + + Args: + optimizer: the optimizer performing the step + current_epoch: current training epoch + batch_idx: index of the current batch + opt_idx: index of the current optimizer + lambda_closure: closure calculating the loss value + + """ + model_ref = self.lightning_module + is_lbfgs = isinstance(optimizer, torch.optim.LBFGS) + native_amp = ( + isinstance(self.precision_plugin, MixedPrecisionPlugin) and self.precision_plugin.backend == AMPType.NATIVE + ) + + self.precision_plugin.pre_optimizer_step(optimizer, opt_idx) + self.training_type_plugin.pre_optimizer_step(optimizer, opt_idx) + + # model hook + res = model_ref.optimizer_step( + epoch=current_epoch, + batch_idx=batch_idx, + optimizer=optimizer, + optimizer_idx=opt_idx, + optimizer_closure=lambda_closure, + on_tpu=False, # TPUAccelerator class sets this as True + using_native_amp=native_amp, + using_lbfgs=is_lbfgs, + ) + + self.precision_plugin.post_optimizer_step(optimizer, opt_idx) + self.training_type_plugin.post_optimizer_step(optimizer, opt_idx) + return res + + def optimizer_zero_grad(self, current_epoch: int, batch_idx: int, optimizer: torch.optim.Optimizer, opt_idx: int) -> None: + """Zeros all model parameter's gradients + """ + model_ref = self.lightning_module + model_ref.optimizer_zero_grad(current_epoch, batch_idx, optimizer, opt_idx) + + def clip_gradients(self, optimizer: torch.optim.Optimizer, clip_val: Union[int, float]) -> None: + """clips all the optimizer parameters to the given value + + """ + + self.precision_plugin.clip_gradients(optimizer, clip_val) + + def on_train_epoch_end(self, outputs) -> None: + """Hook to do something on the end of an training epoch + + Args: + outputs: the outputs of the training steps + """ + pass + + def on_train_end(self) -> None: + """Hook to do something at the end of the training + """ + pass + + def setup_optimizers(self, trainer: 'Trainer', model: LightningModule): + """creates optimizers and schedulers + + Args: + trainer: the Trainer, these optimizers should be connected to + model: the model to be optimized by the created optimizers + """ + if trainer.testing is True: + return + optimizers, lr_schedulers, optimizer_frequencies = trainer.init_optimizers(model) + self.optimizers = optimizers + self.lr_schedulers = lr_schedulers + self.optimizer_frequencies = optimizer_frequencies + + def connect_training_type_plugin(self, plugin: TrainingTypePlugin, model: LightningModule) -> None: + """Attaches the training type plugin to the accelerator. + Also transfers ownership of the model to this plugin + + """ + plugin.connect(model) + + def connect_precision_plugin(self, plugin: PrecisionPlugin): + """Attaches the precision plugin to the accelerator + + """ + model, optimizers, schedulers = plugin.connect(self.model, self.optimizers, self.lr_schedulers) + self.model = model + self.optimizers = optimizers + self.schedulers = schedulers + + def to_device(self, batch: Any) -> Any: + """Pushes the batch to the root device + + """ + return self.batch_to_device(batch, self.root_device) + + @property + def amp_backend(self) -> Optional[LightningEnum]: + if isinstance(self.precision_plugin, ApexMixedPrecisionPlugin): + return AMPType.APEX + elif isinstance(self.precision_plugin, NativeMixedPrecisionPlugin): + return AMPType.NATIVE + else: + return None + + @property + def precision(self) -> int: + return self.precision_plugin.precision + + @property + def scaler(self): + if hasattr(self.precision_plugin, "scaler"): + return self.precision_plugin.scaler + + return None + + @property + def rpc_enabled(self) -> bool: + return self.training_type_plugin.rpc_enabled + + def optimizer_state(self, optimizer: Optimizer) -> dict: + """ + Returns state of an optimizer. Allows for syncing/collating optimizer state from processes in custom + plugins. + """ + if self.training_type_plugin and hasattr(self.training_type_plugin, "optimizer_state"): + return self.training_type_plugin.optimizer_state(optimizer) + return optimizer.state_dict() + + def on_save(self, checkpoint): + return checkpoint \ No newline at end of file From b449f5b550dc3e94ab804181fd6ca7fd5bc555c1 Mon Sep 17 00:00:00 2001 From: justusschock Date: Sat, 30 Jan 2021 14:55:51 +0100 Subject: [PATCH 02/22] Add base plugin class. Co-authored with @awaelchi --- pytorch_lightning/plugins/base_plugin.py | 52 ++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 pytorch_lightning/plugins/base_plugin.py diff --git a/pytorch_lightning/plugins/base_plugin.py b/pytorch_lightning/plugins/base_plugin.py new file mode 100644 index 0000000000000..255780367a0cc --- /dev/null +++ b/pytorch_lightning/plugins/base_plugin.py @@ -0,0 +1,52 @@ +import contextlib + +import torch + + +class Plugin(object): + """Basic Plugin class to derive precision and training type plugins from. + """ + + def connect(self, model: torch.nn.Module, *args, **kwargs): + """Connects the plugin with the accelerator (and thereby with trainer and model). + Will be called by the accelerator. + """ + pass + + def pre_optimizer_step(self, optimizer: torch.optim.Optimizer, optimizer_idx: int): + """Hook to do something before each optimizer step. + """ + pass + + def post_optimizer_step(self, optimizer: torch.optim.Optimizer, optimizer_idx: int): + """Hook to do something after each optimizer step. + """ + pass + + def pre_training(self): + """Hook to do something before the training starts. + """ + pass + + def post_training(self): + """Hook to do something after the training finishes. + """ + pass + + @contextlib.contextmanager + def train_step_context(self): + """A contextmanager for the trainstep + """ + yield + + @contextlib.contextmanager + def val_step_context(self): + """A contextmanager for the validation step + """ + yield + + @contextlib.contextmanager + def test_step_context(self): + """A contextmanager for the teststep + """ + yield \ No newline at end of file From a02e7d594bd115ea174eb67ae37598a55e5f2ba9 Mon Sep 17 00:00:00 2001 From: justusschock Date: Sat, 30 Jan 2021 15:03:29 +0100 Subject: [PATCH 03/22] add basic trainign type plugin. Co-Authored with @awaelchi --- .../training_type/training_type_plugin.py | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 pytorch_lightning/plugins/training_type/training_type_plugin.py diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py new file mode 100644 index 0000000000000..a716339c89cab --- /dev/null +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -0,0 +1,119 @@ +import os +from abc import ABC, abstractmethod +from pytorch_lightning.core.lightning import LightningModule +from typing import Optional + +import torch + +from pytorch_lightning import _logger as log +from pytorch_lightning.plugins.base_plugin import Plugin + + +class TrainingTypePlugin(Plugin, ABC): + """A Plugin to change the behaviour of the training, validation and test-loop. + + """ + def __init__(self): + self._model = None + self._results = None + self.global_rank = 0 + + @property + @abstractmethod + def on_gpu(self) -> bool: + """Returns whether the current process is done on GPU + """ + raise NotImplementedError + + @property + @abstractmethod + def root_device(self) -> torch.device: + """Returns the root device + """ + raise NotImplementedError + + @abstractmethod + def model_to_device(self): + """Moves the model to the correct device + """ + raise NotImplementedError + + @property + @abstractmethod + def is_global_zero(self) -> bool: + """Whether the current process is the rank zero process not only on the local node, but for all nodes. + """ + raise NotImplementedError + + @abstractmethod + def reduce(self, output, *args, **kwargs): + """Reduces the given output (e.g. across GPUs/Processes) + """ + raise NotImplementedError + + @abstractmethod + def barrier(self, name: Optional[str] = None): + """Forces all possibly joined processes to wait for each other + """ + raise NotImplementedError + + @abstractmethod + def broadcast(self, obj: object, src: int = 0) -> object: + """Broadcasts an object to all processes + """ + raise NotImplementedError + + # TODO method this is currently unused. Check after complete refactors are pushed + def set_nvidia_flags(self, is_slurm_managing_tasks, device_ids): + if device_ids is None: + return + + # set the correct cuda visible devices (using pci order) + os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" + all_gpu_ids = ",".join([str(x) for x in range(torch.cuda.device_count())]) + devices = os.environ.get("CUDA_VISIBLE_DEVICES", all_gpu_ids) + log.info(f'LOCAL_RANK: {self.trainer.local_rank} - CUDA_VISIBLE_DEVICES: [{devices}]') + + def reduce_early_stopping_decision(self, should_stop: bool) -> bool: + """Reduce the early stopping decision across all possibly spawned processes + """ + return should_stop + + @property + def model(self) -> torch.nn.Module: + """Returns the potentially wrapped LightningModule + + """ + return self._model + + @model.setter + def model(self, new_model: torch.nn.Module): + self._model = new_model + + @property + def lightning_module(self) -> LightningModule: + """Returns the pure LightningModule without potential wrappers + + """ + return self._model + + @property + def results(self): + """ + The results of the last training/testing run will be cached here. + In distributed training, we make sure to transfer the results to the appropriate master process. + """ + # TODO: improve these docs + return self._results + + @property + def rpc_enabled(self) -> bool: + return False + + def start_training(self, trainer: 'Trainer') -> None: + # double dispatch to initiate the training loop + self._results = trainer.train() + + def start_testing(self, trainer: 'Trainer') -> None: + # double dispatch to initiate the test loop + self._results = trainer.run_test() From f7adc7730bbbdf963dad3ad588773c3ae7668683 Mon Sep 17 00:00:00 2001 From: justusschock Date: Sat, 30 Jan 2021 15:10:31 +0100 Subject: [PATCH 04/22] add basic precision plugin. Co-Authored with @awaelchi --- .../plugins/precision/precision_plugin.py | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 pytorch_lightning/plugins/precision/precision_plugin.py diff --git a/pytorch_lightning/plugins/precision/precision_plugin.py b/pytorch_lightning/plugins/precision/precision_plugin.py new file mode 100644 index 0000000000000..ebf303c00b456 --- /dev/null +++ b/pytorch_lightning/plugins/precision/precision_plugin.py @@ -0,0 +1,99 @@ +import math +from typing import Generator, Union + +import torch +from torch.optim import Optimizer + +from pytorch_lightning.plugins .base_plugin import Plugin +from pytorch_lightning.core import LightningModule + + +class PrecisionPlugin(Plugin): + EPSILON = 1e-6 + precision = 32 + + def master_params(self, optimizer: torch.optim.Optimizer) -> Generator[torch.Tensor, None, None]: + """The master params of the model. Returns the plain model params here. + Maybe different in other precision plugins. + + """ + for group in optimizer.param_groups: + for p in group["params"]: + yield p + + def connect(self, model: torch.nn.Module, optimizers, lr_schedulers): + """Connects this plugin to the accelerator and the training process + + """ + return model, optimizers, lr_schedulers + + def backward( + self, + model: LightningModule, + closure_loss: torch.Tensor, + optimizer: torch.optim.Optimizer, + opt_idx: int, + should_accumulate: bool, + *args, + **kwargs, + ): + """performs the actual backpropagation + + Args: + model: the model to be optimized + closure_loss: the loss value obtained from the closure + optimizer: the optimizer to perform the step lateron + opt_idx: the optimizer's index + should_accumulate: whether to accumulate gradients or not + + """ + automatic_optimization = model.automatic_optimization + + # do backward pass + if automatic_optimization: + model.backward(closure_loss, optimizer, opt_idx) + else: + closure_loss.backward(*args, **kwargs) + + # once backward has been applied, release graph + closure_loss = closure_loss.detach() + + return closure_loss + + def clip_gradients(self, optimizer: Optimizer, clip_val: Union[int, float], norm_type: float = float(2.0)): + """Clips the gradients to a specific value + + """ + # TODO: separate TPU case from here + if clip_val is None: + return + + grad_clip_val = float(clip_val) + + if grad_clip_val <= 0: + return + + parameters = self.master_params(optimizer) + + max_norm = grad_clip_val + + if isinstance(parameters, torch.Tensor): + parameters = [parameters] + parameters = list(filter(lambda p: p.grad is not None, parameters)) + + device = parameters[0].device + + if norm_type == math.inf: + total_norm = max(p.grad.data.abs().max() for p in parameters) + else: + out = torch.empty(len(parameters), device=device) + for i, p in enumerate(parameters): + torch.norm(p.grad.data.to(device), norm_type, out=out[i]) + total_norm = torch.norm(out, norm_type) + + eps = self.EPSILON + + clip_coef = torch.tensor(max_norm, device=device) / (total_norm + eps) + clip_coef = torch.min(clip_coef, torch.ones_like(clip_coef)) + for p in parameters: + p.grad.data.mul_(clip_coef.to(p.grad.data.device)) From a503631aa1e58ed8bda8f43c6dc4b3ec4838ef33 Mon Sep 17 00:00:00 2001 From: justusschock Date: Sat, 30 Jan 2021 15:11:05 +0100 Subject: [PATCH 05/22] Add missing inits. Co-authored with @awaelchi --- pytorch_lightning/plugins/precision/__init__.py | 0 pytorch_lightning/plugins/training_type/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 pytorch_lightning/plugins/precision/__init__.py create mode 100644 pytorch_lightning/plugins/training_type/__init__.py diff --git a/pytorch_lightning/plugins/precision/__init__.py b/pytorch_lightning/plugins/precision/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pytorch_lightning/plugins/training_type/__init__.py b/pytorch_lightning/plugins/training_type/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d From 0c728f346e23c2f04adead5e12d13c8d4ed33d1a Mon Sep 17 00:00:00 2001 From: justusschock Date: Sat, 30 Jan 2021 15:22:58 +0100 Subject: [PATCH 06/22] pep8 Co-authored-by: @awaelchi --- pytorch_lightning/accelerators/accelerator.py | 35 ++++++++-------- pytorch_lightning/plugins/base_plugin.py | 24 ++++------- .../plugins/precision/precision_plugin.py | 12 ++---- .../training_type/training_type_plugin.py | 41 +++++++------------ 4 files changed, 44 insertions(+), 68 deletions(-) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index 7dced06f5e6b7..391b66673b5eb 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -229,7 +229,14 @@ def backward( return output - def optimizer_step(self, optimizer: torch.optim.Optimizer, current_epoch: int, batch_idx: int, opt_idx: int, lambda_closure: Callable): + def optimizer_step( + self, + optimizer: torch.optim.Optimizer, + current_epoch: int, + batch_idx: int, + opt_idx: int, + lambda_closure: Callable, + ): """performs the actual optimizer step. Args: @@ -265,16 +272,15 @@ def optimizer_step(self, optimizer: torch.optim.Optimizer, current_epoch: int, b self.training_type_plugin.post_optimizer_step(optimizer, opt_idx) return res - def optimizer_zero_grad(self, current_epoch: int, batch_idx: int, optimizer: torch.optim.Optimizer, opt_idx: int) -> None: - """Zeros all model parameter's gradients - """ + def optimizer_zero_grad( + self, current_epoch: int, batch_idx: int, optimizer: torch.optim.Optimizer, opt_idx: int + ) -> None: + """Zeros all model parameter's gradients""" model_ref = self.lightning_module model_ref.optimizer_zero_grad(current_epoch, batch_idx, optimizer, opt_idx) def clip_gradients(self, optimizer: torch.optim.Optimizer, clip_val: Union[int, float]) -> None: - """clips all the optimizer parameters to the given value - - """ + """clips all the optimizer parameters to the given value""" self.precision_plugin.clip_gradients(optimizer, clip_val) @@ -287,11 +293,10 @@ def on_train_epoch_end(self, outputs) -> None: pass def on_train_end(self) -> None: - """Hook to do something at the end of the training - """ + """Hook to do something at the end of the training""" pass - def setup_optimizers(self, trainer: 'Trainer', model: LightningModule): + def setup_optimizers(self, trainer: "Trainer", model: LightningModule): """creates optimizers and schedulers Args: @@ -306,25 +311,21 @@ def setup_optimizers(self, trainer: 'Trainer', model: LightningModule): self.optimizer_frequencies = optimizer_frequencies def connect_training_type_plugin(self, plugin: TrainingTypePlugin, model: LightningModule) -> None: - """Attaches the training type plugin to the accelerator. + """Attaches the training type plugin to the accelerator. Also transfers ownership of the model to this plugin """ plugin.connect(model) def connect_precision_plugin(self, plugin: PrecisionPlugin): - """Attaches the precision plugin to the accelerator - - """ + """Attaches the precision plugin to the accelerator""" model, optimizers, schedulers = plugin.connect(self.model, self.optimizers, self.lr_schedulers) self.model = model self.optimizers = optimizers self.schedulers = schedulers def to_device(self, batch: Any) -> Any: - """Pushes the batch to the root device - - """ + """Pushes the batch to the root device""" return self.batch_to_device(batch, self.root_device) @property diff --git a/pytorch_lightning/plugins/base_plugin.py b/pytorch_lightning/plugins/base_plugin.py index 255780367a0cc..d2710f72894b8 100644 --- a/pytorch_lightning/plugins/base_plugin.py +++ b/pytorch_lightning/plugins/base_plugin.py @@ -4,8 +4,7 @@ class Plugin(object): - """Basic Plugin class to derive precision and training type plugins from. - """ + """Basic Plugin class to derive precision and training type plugins from.""" def connect(self, model: torch.nn.Module, *args, **kwargs): """Connects the plugin with the accelerator (and thereby with trainer and model). @@ -14,39 +13,32 @@ def connect(self, model: torch.nn.Module, *args, **kwargs): pass def pre_optimizer_step(self, optimizer: torch.optim.Optimizer, optimizer_idx: int): - """Hook to do something before each optimizer step. - """ + """Hook to do something before each optimizer step.""" pass def post_optimizer_step(self, optimizer: torch.optim.Optimizer, optimizer_idx: int): - """Hook to do something after each optimizer step. - """ + """Hook to do something after each optimizer step.""" pass def pre_training(self): - """Hook to do something before the training starts. - """ + """Hook to do something before the training starts.""" pass def post_training(self): - """Hook to do something after the training finishes. - """ + """Hook to do something after the training finishes.""" pass @contextlib.contextmanager def train_step_context(self): - """A contextmanager for the trainstep - """ + """A contextmanager for the trainstep""" yield @contextlib.contextmanager def val_step_context(self): - """A contextmanager for the validation step - """ + """A contextmanager for the validation step""" yield @contextlib.contextmanager def test_step_context(self): - """A contextmanager for the teststep - """ + """A contextmanager for the teststep""" yield \ No newline at end of file diff --git a/pytorch_lightning/plugins/precision/precision_plugin.py b/pytorch_lightning/plugins/precision/precision_plugin.py index ebf303c00b456..53ecb4deaeaf7 100644 --- a/pytorch_lightning/plugins/precision/precision_plugin.py +++ b/pytorch_lightning/plugins/precision/precision_plugin.py @@ -4,7 +4,7 @@ import torch from torch.optim import Optimizer -from pytorch_lightning.plugins .base_plugin import Plugin +from pytorch_lightning.plugins.base_plugin import Plugin from pytorch_lightning.core import LightningModule @@ -13,7 +13,7 @@ class PrecisionPlugin(Plugin): precision = 32 def master_params(self, optimizer: torch.optim.Optimizer) -> Generator[torch.Tensor, None, None]: - """The master params of the model. Returns the plain model params here. + """The master params of the model. Returns the plain model params here. Maybe different in other precision plugins. """ @@ -22,9 +22,7 @@ def master_params(self, optimizer: torch.optim.Optimizer) -> Generator[torch.Ten yield p def connect(self, model: torch.nn.Module, optimizers, lr_schedulers): - """Connects this plugin to the accelerator and the training process - - """ + """Connects this plugin to the accelerator and the training process""" return model, optimizers, lr_schedulers def backward( @@ -61,9 +59,7 @@ def backward( return closure_loss def clip_gradients(self, optimizer: Optimizer, clip_val: Union[int, float], norm_type: float = float(2.0)): - """Clips the gradients to a specific value - - """ + """Clips the gradients to a specific value""" # TODO: separate TPU case from here if clip_val is None: return diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py index a716339c89cab..f000dac01e75e 100644 --- a/pytorch_lightning/plugins/training_type/training_type_plugin.py +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -10,9 +10,8 @@ class TrainingTypePlugin(Plugin, ABC): - """A Plugin to change the behaviour of the training, validation and test-loop. + """A Plugin to change the behaviour of the training, validation and test-loop.""" - """ def __init__(self): self._model = None self._results = None @@ -21,46 +20,39 @@ def __init__(self): @property @abstractmethod def on_gpu(self) -> bool: - """Returns whether the current process is done on GPU - """ + """Returns whether the current process is done on GPU""" raise NotImplementedError @property @abstractmethod def root_device(self) -> torch.device: - """Returns the root device - """ + """Returns the root device""" raise NotImplementedError @abstractmethod def model_to_device(self): - """Moves the model to the correct device - """ + """Moves the model to the correct device""" raise NotImplementedError @property @abstractmethod def is_global_zero(self) -> bool: - """Whether the current process is the rank zero process not only on the local node, but for all nodes. - """ + """Whether the current process is the rank zero process not only on the local node, but for all nodes.""" raise NotImplementedError @abstractmethod def reduce(self, output, *args, **kwargs): - """Reduces the given output (e.g. across GPUs/Processes) - """ + """Reduces the given output (e.g. across GPUs/Processes)""" raise NotImplementedError @abstractmethod def barrier(self, name: Optional[str] = None): - """Forces all possibly joined processes to wait for each other - """ + """Forces all possibly joined processes to wait for each other""" raise NotImplementedError @abstractmethod def broadcast(self, obj: object, src: int = 0) -> object: - """Broadcasts an object to all processes - """ + """Broadcasts an object to all processes""" raise NotImplementedError # TODO method this is currently unused. Check after complete refactors are pushed @@ -72,18 +64,15 @@ def set_nvidia_flags(self, is_slurm_managing_tasks, device_ids): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" all_gpu_ids = ",".join([str(x) for x in range(torch.cuda.device_count())]) devices = os.environ.get("CUDA_VISIBLE_DEVICES", all_gpu_ids) - log.info(f'LOCAL_RANK: {self.trainer.local_rank} - CUDA_VISIBLE_DEVICES: [{devices}]') + log.info(f"LOCAL_RANK: {self.trainer.local_rank} - CUDA_VISIBLE_DEVICES: [{devices}]") def reduce_early_stopping_decision(self, should_stop: bool) -> bool: - """Reduce the early stopping decision across all possibly spawned processes - """ + """Reduce the early stopping decision across all possibly spawned processes""" return should_stop @property def model(self) -> torch.nn.Module: - """Returns the potentially wrapped LightningModule - - """ + """Returns the potentially wrapped LightningModule""" return self._model @model.setter @@ -92,9 +81,7 @@ def model(self, new_model: torch.nn.Module): @property def lightning_module(self) -> LightningModule: - """Returns the pure LightningModule without potential wrappers - - """ + """Returns the pure LightningModule without potential wrappers""" return self._model @property @@ -110,10 +97,10 @@ def results(self): def rpc_enabled(self) -> bool: return False - def start_training(self, trainer: 'Trainer') -> None: + def start_training(self, trainer: "Trainer") -> None: # double dispatch to initiate the training loop self._results = trainer.train() - def start_testing(self, trainer: 'Trainer') -> None: + def start_testing(self, trainer: "Trainer") -> None: # double dispatch to initiate the test loop self._results = trainer.run_test() From e700890a7d71cf5f67848dfa85e9cbe0af9dc811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 30 Jan 2021 16:49:23 +0100 Subject: [PATCH 07/22] ignore flake8 --- setup.cfg | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 20a53751e1dc2..e2a5b0269bbd8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -58,7 +58,17 @@ omit = [flake8] # TODO: this should be 88 or 100 according PEP8 max-line-length = 120 -exclude = .tox,*.egg,build,temp +exclude = + .tox, + *.egg + build + temp + # TODO: temporary until accelerator refactor finished + pytorch_lightning/accelerators/accelerator.py + pytorch_lightning/plugins/training_type + pytorch_lightning/plugins/precision + pytorch_lightning/plugins/base_plugin.py + select = E,W,F doctests = True verbose = 2 From ce2ce0c5453f41ffe845a648e4ffc80ba69c3e00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 30 Jan 2021 17:13:03 +0100 Subject: [PATCH 08/22] coverage omit --- setup.cfg | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/setup.cfg b/setup.cfg index e2a5b0269bbd8..dc351647a9986 100644 --- a/setup.cfg +++ b/setup.cfg @@ -53,6 +53,11 @@ omit = pytorch_lightning/utilities/xla_device_utils.py pytorch_lightning/utilities/distributed.py pytorch_lightning/tuner/auto_gpu_select.py + # TODO: temporary, until accelerator refactor is finished + pytorch_lightning/accelerators/accelerator.py + pytorch_lightning/plugins/training_type/*.py + pytorch_lightning/plugins/precision/*.py + pytorch_lightning/plugins/base_plugin.py [flake8] From a341186c13accad091b15c2abfcf8694a39d402f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 30 Jan 2021 17:29:24 +0100 Subject: [PATCH 09/22] imports in init --- pytorch_lightning/plugins/__init__.py | 3 +++ pytorch_lightning/plugins/precision/__init__.py | 3 +++ pytorch_lightning/plugins/training_type/__init__.py | 1 + 3 files changed, 7 insertions(+) diff --git a/pytorch_lightning/plugins/__init__.py b/pytorch_lightning/plugins/__init__.py index e69de29bb2d1d..e023060d5b16a 100644 --- a/pytorch_lightning/plugins/__init__.py +++ b/pytorch_lightning/plugins/__init__.py @@ -0,0 +1,3 @@ +from pytorch_lightning.plugins.base_plugin import Plugin +from pytorch_lightning.plugins.precision import * +from pytorch_lightning.plugins.training_type import * diff --git a/pytorch_lightning/plugins/precision/__init__.py b/pytorch_lightning/plugins/precision/__init__.py index e69de29bb2d1d..e023060d5b16a 100644 --- a/pytorch_lightning/plugins/precision/__init__.py +++ b/pytorch_lightning/plugins/precision/__init__.py @@ -0,0 +1,3 @@ +from pytorch_lightning.plugins.base_plugin import Plugin +from pytorch_lightning.plugins.precision import * +from pytorch_lightning.plugins.training_type import * diff --git a/pytorch_lightning/plugins/training_type/__init__.py b/pytorch_lightning/plugins/training_type/__init__.py index e69de29bb2d1d..329f6347b17c3 100644 --- a/pytorch_lightning/plugins/training_type/__init__.py +++ b/pytorch_lightning/plugins/training_type/__init__.py @@ -0,0 +1 @@ +from pytorch_lightning.plugins.training_type.training_type_plugin import TrainingTypePlugin From 1bd6a14cd0e3d600db60f99c534fe96f3d0f3b61 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Sat, 30 Jan 2021 19:48:05 +0100 Subject: [PATCH 10/22] lost --- pytorch_lightning/accelerators/accelerator.py | 40 +++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index 391b66673b5eb..fdecdb49d3da7 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -4,13 +4,7 @@ import torch from torch.optim import Optimizer -from pytorch_lightning.plugins import TrainingTypePlugin, HorovodPlugin -from pytorch_lightning.plugins.precision import ( - ApexMixedPrecisionPlugin, - MixedPrecisionPlugin, - NativeMixedPrecisionPlugin, - PrecisionPlugin, -) +from pytorch_lightning.plugins import TrainingTypePlugin from pytorch_lightning.core import LightningModule from pytorch_lightning.utilities import AMPType from pytorch_lightning.utilities.apply_func import move_data_to_device @@ -33,7 +27,7 @@ class Accelerator(object): def __init__( self, - precision_plugin: PrecisionPlugin, + precision_plugin, #: PrecisionPlugin # fixme training_type_plugin: TrainingTypePlugin, ) -> None: """ @@ -224,8 +218,9 @@ def backward( ) # TODO: this is a hack, find a better solution for this (hook?) - if isinstance(self.training_type_plugin, HorovodPlugin): - optimizer.synchronize() + # fixme: uncomment when this class is added + # if isinstance(self.training_type_plugin, HorovodPlugin): + # optimizer.synchronize() return output @@ -249,9 +244,11 @@ def optimizer_step( """ model_ref = self.lightning_module is_lbfgs = isinstance(optimizer, torch.optim.LBFGS) - native_amp = ( - isinstance(self.precision_plugin, MixedPrecisionPlugin) and self.precision_plugin.backend == AMPType.NATIVE - ) + # fixme: uncomment when this class is added + # is_native_amp = ( + # isinstance(self.precision_plugin, MixedPrecisionPlugin) and self.precision_plugin.backend == AMPType.NATIVE + # ) + is_native_amp = False self.precision_plugin.pre_optimizer_step(optimizer, opt_idx) self.training_type_plugin.pre_optimizer_step(optimizer, opt_idx) @@ -264,7 +261,7 @@ def optimizer_step( optimizer_idx=opt_idx, optimizer_closure=lambda_closure, on_tpu=False, # TPUAccelerator class sets this as True - using_native_amp=native_amp, + using_native_amp=is_native_amp, using_lbfgs=is_lbfgs, ) @@ -317,7 +314,7 @@ def connect_training_type_plugin(self, plugin: TrainingTypePlugin, model: Lightn """ plugin.connect(model) - def connect_precision_plugin(self, plugin: PrecisionPlugin): + def connect_precision_plugin(self, plugin): #: PrecisionPlugin # fixme """Attaches the precision plugin to the accelerator""" model, optimizers, schedulers = plugin.connect(self.model, self.optimizers, self.lr_schedulers) self.model = model @@ -330,12 +327,13 @@ def to_device(self, batch: Any) -> Any: @property def amp_backend(self) -> Optional[LightningEnum]: - if isinstance(self.precision_plugin, ApexMixedPrecisionPlugin): - return AMPType.APEX - elif isinstance(self.precision_plugin, NativeMixedPrecisionPlugin): - return AMPType.NATIVE - else: - return None + # fixme: uncomment when this class is added + # if isinstance(self.precision_plugin, ApexMixedPrecisionPlugin): + # return AMPType.APEX + # elif isinstance(self.precision_plugin, NativeMixedPrecisionPlugin): + # return AMPType.NATIVE + # return None + pass @property def precision(self) -> int: From 6930a1ec0547aae4a6989d36c5c89225f180210f Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Sat, 30 Jan 2021 19:52:37 +0100 Subject: [PATCH 11/22] imports --- pytorch_lightning/accelerators/accelerator.py | 4 ++-- pytorch_lightning/plugins/__init__.py | 4 ++-- pytorch_lightning/plugins/precision/__init__.py | 4 +--- pytorch_lightning/plugins/precision/precision_plugin.py | 2 +- .../plugins/training_type/training_type_plugin.py | 2 +- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index fdecdb49d3da7..04ac9b46c2ccf 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -1,13 +1,13 @@ -from pytorch_lightning.utilities.enums import LightningEnum from typing import Any, Callable, Iterable, Optional, Union import torch from torch.optim import Optimizer -from pytorch_lightning.plugins import TrainingTypePlugin from pytorch_lightning.core import LightningModule +from pytorch_lightning.plugins import TrainingTypePlugin from pytorch_lightning.utilities import AMPType from pytorch_lightning.utilities.apply_func import move_data_to_device +from pytorch_lightning.utilities.enums import LightningEnum class Accelerator(object): diff --git a/pytorch_lightning/plugins/__init__.py b/pytorch_lightning/plugins/__init__.py index e023060d5b16a..d5d8b366156a6 100644 --- a/pytorch_lightning/plugins/__init__.py +++ b/pytorch_lightning/plugins/__init__.py @@ -1,3 +1,3 @@ from pytorch_lightning.plugins.base_plugin import Plugin -from pytorch_lightning.plugins.precision import * -from pytorch_lightning.plugins.training_type import * +from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin +from pytorch_lightning.plugins.training_type.training_type_plugin import TrainingTypePlugin diff --git a/pytorch_lightning/plugins/precision/__init__.py b/pytorch_lightning/plugins/precision/__init__.py index e023060d5b16a..8b137891791fe 100644 --- a/pytorch_lightning/plugins/precision/__init__.py +++ b/pytorch_lightning/plugins/precision/__init__.py @@ -1,3 +1 @@ -from pytorch_lightning.plugins.base_plugin import Plugin -from pytorch_lightning.plugins.precision import * -from pytorch_lightning.plugins.training_type import * + diff --git a/pytorch_lightning/plugins/precision/precision_plugin.py b/pytorch_lightning/plugins/precision/precision_plugin.py index 53ecb4deaeaf7..2c77ff045575e 100644 --- a/pytorch_lightning/plugins/precision/precision_plugin.py +++ b/pytorch_lightning/plugins/precision/precision_plugin.py @@ -4,8 +4,8 @@ import torch from torch.optim import Optimizer -from pytorch_lightning.plugins.base_plugin import Plugin from pytorch_lightning.core import LightningModule +from pytorch_lightning.plugins.base_plugin import Plugin class PrecisionPlugin(Plugin): diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py index f000dac01e75e..352e0047bfaca 100644 --- a/pytorch_lightning/plugins/training_type/training_type_plugin.py +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -1,11 +1,11 @@ import os from abc import ABC, abstractmethod -from pytorch_lightning.core.lightning import LightningModule from typing import Optional import torch from pytorch_lightning import _logger as log +from pytorch_lightning.core.lightning import LightningModule from pytorch_lightning.plugins.base_plugin import Plugin From e58532ec3a3c52a317610bb0a8ee7a711982cfee Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Sat, 30 Jan 2021 19:53:31 +0100 Subject: [PATCH 12/22] flake8 --- pytorch_lightning/plugins/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/plugins/__init__.py b/pytorch_lightning/plugins/__init__.py index d5d8b366156a6..a17d5127edfc6 100644 --- a/pytorch_lightning/plugins/__init__.py +++ b/pytorch_lightning/plugins/__init__.py @@ -1,3 +1,3 @@ -from pytorch_lightning.plugins.base_plugin import Plugin -from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin -from pytorch_lightning.plugins.training_type.training_type_plugin import TrainingTypePlugin +from pytorch_lightning.plugins.base_plugin import Plugin # noqa: F401 +from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin # noqa: F401 +from pytorch_lightning.plugins.training_type.training_type_plugin import TrainingTypePlugin # noqa: F401 From 77b1b28cc2588fe9b7eeab856451e9940091af44 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Sat, 30 Jan 2021 19:55:09 +0100 Subject: [PATCH 13/22] . --- pytorch_lightning/accelerators/accelerator.py | 13 +++++++++++++ pytorch_lightning/plugins/base_plugin.py | 13 +++++++++++++ .../plugins/precision/precision_plugin.py | 13 +++++++++++++ .../plugins/training_type/training_type_plugin.py | 13 +++++++++++++ 4 files changed, 52 insertions(+) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index 04ac9b46c2ccf..db2f8a005fd82 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -1,3 +1,16 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from typing import Any, Callable, Iterable, Optional, Union import torch diff --git a/pytorch_lightning/plugins/base_plugin.py b/pytorch_lightning/plugins/base_plugin.py index d2710f72894b8..c4eeff52751a6 100644 --- a/pytorch_lightning/plugins/base_plugin.py +++ b/pytorch_lightning/plugins/base_plugin.py @@ -1,3 +1,16 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import contextlib import torch diff --git a/pytorch_lightning/plugins/precision/precision_plugin.py b/pytorch_lightning/plugins/precision/precision_plugin.py index 2c77ff045575e..0ff54bf1e8515 100644 --- a/pytorch_lightning/plugins/precision/precision_plugin.py +++ b/pytorch_lightning/plugins/precision/precision_plugin.py @@ -1,3 +1,16 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import math from typing import Generator, Union diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py index 352e0047bfaca..6d07261ec610b 100644 --- a/pytorch_lightning/plugins/training_type/training_type_plugin.py +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -1,3 +1,16 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os from abc import ABC, abstractmethod from typing import Optional From ec8fc9ae8d5b3058846ffc07f8c81a9780b46b77 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Sat, 30 Jan 2021 19:55:32 +0100 Subject: [PATCH 14/22] . --- pytorch_lightning/accelerators/accelerator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py index db2f8a005fd82..c5c77d4711e6a 100644 --- a/pytorch_lightning/accelerators/accelerator.py +++ b/pytorch_lightning/accelerators/accelerator.py @@ -18,7 +18,6 @@ from pytorch_lightning.core import LightningModule from pytorch_lightning.plugins import TrainingTypePlugin -from pytorch_lightning.utilities import AMPType from pytorch_lightning.utilities.apply_func import move_data_to_device from pytorch_lightning.utilities.enums import LightningEnum From 9b652206af7e8abffb82b2059f38809d58a4e68a Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Sat, 30 Jan 2021 20:05:31 +0100 Subject: [PATCH 15/22] chlog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e465056457c3..33ec4a8463600 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -107,6 +107,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed the default value for the `progress_bar_refresh_rate` Trainer argument in Google COLAB notebooks to 20 ([#5516](https://github.com/PyTorchLightning/pytorch-lightning/pull/5516)) +- Refactored Accelerators and Plugins ( + [#5715](https://github.com/PyTorchLightning/pytorch-lightning/pull/5715), + ) + + ### Deprecated - `stat_scores_multiple_classes` is deprecated in favor of `stat_scores` ([#4839](https://github.com/PyTorchLightning/pytorch-lightning/pull/4839)) From 187afa9c8b202379648879311766139ba1082f73 Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Sat, 30 Jan 2021 20:09:21 +0100 Subject: [PATCH 16/22] Update pytorch_lightning/plugins/training_type/training_type_plugin.py Co-authored-by: Jirka Borovec --- pytorch_lightning/plugins/training_type/training_type_plugin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py index 6d07261ec610b..49954673cf244 100644 --- a/pytorch_lightning/plugins/training_type/training_type_plugin.py +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -34,7 +34,6 @@ def __init__(self): @abstractmethod def on_gpu(self) -> bool: """Returns whether the current process is done on GPU""" - raise NotImplementedError @property @abstractmethod From 6d0e153592ccfbc6532c9fc30913615d4547682b Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Sat, 30 Jan 2021 20:11:36 +0100 Subject: [PATCH 17/22] Update pytorch_lightning/plugins/training_type/training_type_plugin.py Co-authored-by: Jirka Borovec --- pytorch_lightning/plugins/training_type/training_type_plugin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py index 49954673cf244..6c0c896e7d9c5 100644 --- a/pytorch_lightning/plugins/training_type/training_type_plugin.py +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -39,7 +39,6 @@ def on_gpu(self) -> bool: @abstractmethod def root_device(self) -> torch.device: """Returns the root device""" - raise NotImplementedError @abstractmethod def model_to_device(self): From 0301c03e1d407b44a997cdcba2cdb665530ec95d Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Sat, 30 Jan 2021 20:12:12 +0100 Subject: [PATCH 18/22] Update pytorch_lightning/plugins/training_type/training_type_plugin.py Co-authored-by: Jirka Borovec --- pytorch_lightning/plugins/training_type/training_type_plugin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py index 6c0c896e7d9c5..32a452fadf703 100644 --- a/pytorch_lightning/plugins/training_type/training_type_plugin.py +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -43,7 +43,6 @@ def root_device(self) -> torch.device: @abstractmethod def model_to_device(self): """Moves the model to the correct device""" - raise NotImplementedError @property @abstractmethod From 3e1b92dd5c8d89d5c88884cdb0929587e1f1c0b3 Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Sat, 30 Jan 2021 20:14:36 +0100 Subject: [PATCH 19/22] Update pytorch_lightning/plugins/training_type/training_type_plugin.py Co-authored-by: Jirka Borovec --- pytorch_lightning/plugins/training_type/training_type_plugin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py index 32a452fadf703..528993063613e 100644 --- a/pytorch_lightning/plugins/training_type/training_type_plugin.py +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -48,7 +48,6 @@ def model_to_device(self): @abstractmethod def is_global_zero(self) -> bool: """Whether the current process is the rank zero process not only on the local node, but for all nodes.""" - raise NotImplementedError @abstractmethod def reduce(self, output, *args, **kwargs): From eaa4147e92f65414131c9d0ba5b2346aaa050030 Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Sat, 30 Jan 2021 20:14:52 +0100 Subject: [PATCH 20/22] Update pytorch_lightning/plugins/training_type/training_type_plugin.py Co-authored-by: Jirka Borovec --- pytorch_lightning/plugins/training_type/training_type_plugin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py index 528993063613e..7c725df758161 100644 --- a/pytorch_lightning/plugins/training_type/training_type_plugin.py +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -52,7 +52,6 @@ def is_global_zero(self) -> bool: @abstractmethod def reduce(self, output, *args, **kwargs): """Reduces the given output (e.g. across GPUs/Processes)""" - raise NotImplementedError @abstractmethod def barrier(self, name: Optional[str] = None): From e35f64c09a6efe42bc7c47b762c13c0000676475 Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Sat, 30 Jan 2021 20:15:28 +0100 Subject: [PATCH 21/22] Update pytorch_lightning/plugins/training_type/training_type_plugin.py Co-authored-by: Jirka Borovec --- pytorch_lightning/plugins/training_type/training_type_plugin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py index 7c725df758161..6db0a9c6d39f5 100644 --- a/pytorch_lightning/plugins/training_type/training_type_plugin.py +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -56,7 +56,6 @@ def reduce(self, output, *args, **kwargs): @abstractmethod def barrier(self, name: Optional[str] = None): """Forces all possibly joined processes to wait for each other""" - raise NotImplementedError @abstractmethod def broadcast(self, obj: object, src: int = 0) -> object: From 1c8771a79153a9f42f38cc51efc3595f7d5acea3 Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Sat, 30 Jan 2021 20:15:47 +0100 Subject: [PATCH 22/22] Update pytorch_lightning/plugins/training_type/training_type_plugin.py Co-authored-by: Jirka Borovec --- pytorch_lightning/plugins/training_type/training_type_plugin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py index 6db0a9c6d39f5..d1e7907d5d97f 100644 --- a/pytorch_lightning/plugins/training_type/training_type_plugin.py +++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py @@ -60,7 +60,6 @@ def barrier(self, name: Optional[str] = None): @abstractmethod def broadcast(self, obj: object, src: int = 0) -> object: """Broadcasts an object to all processes""" - raise NotImplementedError # TODO method this is currently unused. Check after complete refactors are pushed def set_nvidia_flags(self, is_slurm_managing_tasks, device_ids):