Merge branch 'master' into bugfix/manual_sharded

Lightning-AI · Apr 10, 2021 · d054ed0 · d054ed0
2 parents d821602 + 20ff50c
commit d054ed0
Show file tree

Hide file tree

Showing 18 changed files with 204 additions and 136 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -48,7 +48,7 @@
 # Testing
 /tests/helpers/boring_model.py          @williamfalcon @tchaton @borda
 
-/.github/CODEOWNERS         @williamfalcon
-/README.md                  @williamfalcon @edenlightning @borda
-/setup.py                   @williamfalcon @borda
-/pytorch_lightning/info.py  @williamfalcon @borda
+/.github/CODEOWNERS             @williamfalcon
+/README.md                      @williamfalcon @edenlightning @borda
+/setup.py                       @williamfalcon @borda
+/pytorch_lightning/__info__.py  @williamfalcon @borda
diff --git a/.github/prepare-nightly_version.py b/.github/prepare-nightly_version.py
@@ -4,15 +4,15 @@
 
 # set paths
 _PATH_ROOT = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
-_PATH_INIT = os.path.join(_PATH_ROOT, 'pytorch_lightning', '__init__.py')
+_PATH_INFO = os.path.join(_PATH_ROOT, 'pytorch_lightning', '__info__.py')
 
 # get today date
 now = datetime.datetime.now()
 now_date = now.strftime("%Y%m%d")
 
-print(f"prepare init '{_PATH_INIT}' - replace version by {now_date}")
-with open(_PATH_INIT, 'r') as fp:
+print(f"prepare init '{_PATH_INFO}' - replace version by {now_date}")
+with open(_PATH_INFO, 'r') as fp:
     init = fp.read()
 init = re.sub(r'__version__ = [\d\.\w\'"]+', f'__version__ = "{now_date}"', init)
-with open(_PATH_INIT, 'w') as fp:
+with open(_PATH_INFO, 'w') as fp:
     fp.write(init)
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -231,6 +231,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed `EarlyStopping` logic when `min_epochs` or `min_steps` requirement is not met ([#6705](https://github.com/PyTorchLightning/pytorch-lightning/pull/6705))
 
 
+- Fixed TPU Spawn all gather ([#6896](https://github.com/PyTorchLightning/pytorch-lightning/pull/6896))
+
+
 - Fixed `--gpus` default for parser returned by `Trainer.add_argparse_args` ([#6898](https://github.com/PyTorchLightning/pytorch-lightning/pull/6898))
 
 

diff --git a/docs/source/api_references.rst b/docs/source/api_references.rst
@@ -1,6 +1,21 @@
 API References
 ==============
 
+Accelerator API
+---------------
+
+.. currentmodule:: pytorch_lightning.accelerators
+
+.. autosummary::
+    :toctree: api
+    :nosignatures:
+    :template: classtemplate.rst
+
+    Accelerator
+    CPUAccelerator
+    GPUAccelerator
+    TPUAccelerator
+
 Core API
 --------
 

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -28,11 +28,11 @@
 SPHINX_MOCK_REQUIREMENTS = int(os.environ.get('SPHINX_MOCK_REQUIREMENTS', True))
 
 try:
-    from pytorch_lightning import info
+    from pytorch_lightning import __info__ as info
 except ImportError:
     # alternative https://stackoverflow.com/a/67692/4521646
     sys.path.append(os.path.join(PATH_ROOT, "pytorch_lightning"))
-    import info
+    import __info__ as info
 
 # -- Project documents -------------------------------------------------------
 

diff --git a/docs/source/extensions/accelerators.rst b/docs/source/extensions/accelerators.rst
@@ -1,10 +1,56 @@
+.. _accelerators:
+
 ############
 Accelerators
 ############
 Accelerators connect a Lightning Trainer to arbitrary accelerators (CPUs, GPUs, TPUs, etc). Accelerators
-also manage distributed accelerators (like DP, DDP, HPC cluster).
-
-Accelerators can also be configured to run on arbitrary clusters using Plugins or to link up to arbitrary
+also manage distributed communication through :ref:`Plugins` (like DP, DDP, HPC cluster) and
+can also be configured to run on arbitrary clusters or to link up to arbitrary
 computational strategies like 16-bit precision via AMP and Apex.
 
-**For help setting up custom plugin/accelerator please reach out to us at [email protected]**
+An Accelerator is meant to deal with one type of hardware.
+Currently there are accelerators for:
+
+- CPU
+- GPU
+- TPU
+
+Each Accelerator gets two plugins upon initialization:
+One to handle differences from the training routine and one to handle different precisions.
+
+.. testcode::
+
+    from pytorch_lightning import Trainer
+    from pytorch_lightning.accelerators import GPUAccelerator
+    from pytorch_lightning.plugins import NativeMixedPrecisionPlugin, DDPPlugin
+
+    accelerator = GPUAccelerator(
+        precision_plugin=NativeMixedPrecisionPlugin(),
+        training_type_plugin=DDPPlugin(),
+    )
+    trainer = Trainer(accelerator=accelerator)
+
+
+We expose Accelerators and Plugins mainly for expert users who want to extend Lightning to work with new
+hardware and distributed training or clusters.
+
+
+.. warning:: The Accelerator API is in beta and subject to change.
+    For help setting up custom plugins/accelerators, please reach out to us at **[email protected]**
+
+----------
+
+
+Accelerator API
+---------------
+
+.. currentmodule:: pytorch_lightning.accelerators
+
+.. autosummary::
+    :nosignatures:
+    :template: classtemplate.rst
+
+    Accelerator
+    CPUAccelerator
+    GPUAccelerator
+    TPUAccelerator
diff --git a/docs/source/extensions/plugins.rst b/docs/source/extensions/plugins.rst
@@ -1,3 +1,5 @@
+.. _plugins:
+
 #######
 Plugins
 #######

diff --git a/pytorch_lightning/info.py → pytorch_lightning/__info__.py b/pytorch_lightning/info.py → pytorch_lightning/__info__.py
@@ -1,7 +1,7 @@
 import time
 
 _this_year = time.strftime("%Y")
-__version__ = '1.3.0rc0'
+__version__ = '1.3.0rc1'
 __author__ = 'William Falcon et al.'
 __author_email__ = '[email protected]'
 __license__ = 'Apache-2.0'

diff --git a/pytorch_lightning/__init__.py b/pytorch_lightning/__init__.py
@@ -3,7 +3,7 @@
 import logging
 import os
 
-from pytorch_lightning.info import (  # noqa: F401
+from pytorch_lightning.__info__ import (  # noqa: F401
     __author__,
     __author_email__,
     __copyright__,

diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py
@@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import contextlib
-from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Sequence, TYPE_CHECKING, Union
+from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Sequence, Union
 
 import torch
 from torch.optim import Optimizer
 from torch.utils.data import DataLoader
 
+import pytorch_lightning as pl
 from pytorch_lightning.core import LightningModule
 from pytorch_lightning.plugins.precision import ApexMixedPrecisionPlugin, NativeMixedPrecisionPlugin, PrecisionPlugin
 from pytorch_lightning.plugins.training_type import TrainingTypePlugin
@@ -26,11 +27,6 @@
 from pytorch_lightning.utilities.apply_func import move_data_to_device
 from pytorch_lightning.utilities.enums import AMPType, GradClipAlgorithmType, LightningEnum
 
-if TYPE_CHECKING:
-    from torch.cuda.amp import GradScaler
-
-    from pytorch_lightning.trainer.trainer import Trainer
-
 _STEP_OUTPUT_TYPE = Union[torch.Tensor, Dict[str, torch.Tensor], None]
 
 
@@ -40,6 +36,7 @@ class Accelerator(object):
     An Accelerator is meant to deal with one type of Hardware.
 
     Currently there are accelerators for:
+
     - CPU
     - GPU
     - TPU
@@ -79,9 +76,10 @@ def setup_environment(self) -> None:
         """
         self.training_type_plugin.setup_environment()
 
-    def setup(self, trainer: 'Trainer', model: LightningModule) -> None:
+    def setup(self, trainer: 'pl.Trainer', model: LightningModule) -> None:
         """
         Setup plugins for the trainer fit and creates optimizers.
+
         Args:
             trainer: the trainer instance
             model: the LightningModule
@@ -91,23 +89,23 @@ def setup(self, trainer: 'Trainer', model: LightningModule) -> None:
             self.setup_optimizers(trainer)
         self.setup_precision_plugin(self.precision_plugin)
 
-    def start_training(self, trainer: 'Trainer') -> None:
+    def start_training(self, trainer: 'pl.Trainer') -> None:
         self.training_type_plugin.start_training(trainer)
 
-    def start_evaluating(self, trainer: 'Trainer') -> None:
+    def start_evaluating(self, trainer: 'pl.Trainer') -> None:
         self.training_type_plugin.start_evaluating(trainer)
 
-    def start_predicting(self, trainer: 'Trainer') -> None:
+    def start_predicting(self, trainer: 'pl.Trainer') -> None:
         self.training_type_plugin.start_predicting(trainer)
 
-    def pre_dispatch(self, trainer: 'Trainer') -> None:
+    def pre_dispatch(self, trainer: 'pl.Trainer') -> None:
         """Hook to do something before the training/evaluation/prediction starts."""
         self.training_type_plugin.pre_dispatch()
         if self.training_type_plugin.setup_optimizers_in_pre_dispatch:
             self.setup_optimizers(trainer)
         self.precision_plugin.pre_dispatch()
 
-    def post_dispatch(self, trainer: 'Trainer') -> None:
+    def post_dispatch(self, trainer: 'pl.Trainer') -> None:
         """Hook to do something before the training/evaluation/prediction starts."""
         self.training_type_plugin.post_dispatch()
         self.precision_plugin.post_dispatch()
@@ -169,12 +167,13 @@ def training_step(
 
         Args:
             args: the arguments for the models training step. Can consist of the following:
-                batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]):
-                    The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list.
-                batch_idx (int): Integer displaying index of this batch
-                optimizer_idx (int): When using multiple optimizers, this argument will also be present.
-                hiddens(:class:`~torch.Tensor`): Passed in if
-                    :paramref:`~pytorch_lightning.trainer.trainer.Trainer.truncated_bptt_steps` > 0.
+
+                - batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]):
+                  The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list.
+                - batch_idx (int): Integer displaying index of this batch
+                - optimizer_idx (int): When using multiple optimizers, this argument will also be present.
+                - hiddens(:class:`~torch.Tensor`): Passed in if
+                  :paramref:`~pytorch_lightning.trainer.trainer.Trainer.truncated_bptt_steps` > 0.
 
         """
         args[0] = self.to_device(args[0])
@@ -190,11 +189,12 @@ def validation_step(self, args: List[Union[Any, int]]) -> _STEP_OUTPUT_TYPE:
 
         Args:
             args: the arguments for the models validation step. Can consist of the following:
-                batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]):
-                    The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list.
-                batch_idx (int): The index of this batch
-                dataloader_idx (int): The index of the dataloader that produced this batch
-                    (only if multiple val dataloaders used)
+
+                - batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]):
+                  The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list.
+                - batch_idx (int): The index of this batch
+                - dataloader_idx (int): The index of the dataloader that produced this batch
+                  (only if multiple val dataloaders used)
         """
         batch = self.to_device(args[0])
 
@@ -208,11 +208,12 @@ def test_step(self, args: List[Union[Any, int]]) -> _STEP_OUTPUT_TYPE:
 
         Args:
             args: the arguments for the models test step. Can consist of the following:
-                batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]):
-                    The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list.
-                batch_idx (int): The index of this batch.
-                dataloader_idx (int): The index of the dataloader that produced this batch
-                    (only if multiple test dataloaders used).
+
+                - batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]):
+                  The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list.
+                - batch_idx (int): The index of this batch.
+                - dataloader_idx (int): The index of the dataloader that produced this batch
+                  (only if multiple test dataloaders used).
         """
         batch = self.to_device(args[0])
 
@@ -226,11 +227,13 @@ def predict_step(self, args: List[Union[Any, int]]) -> _STEP_OUTPUT_TYPE:
 
         Args:
             args: the arguments for the models predict step. Can consist of the following:
-                batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]):
-                    The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list.
-                batch_idx (int): The index of this batch.
-                dataloader_idx (int): The index of the dataloader that produced this batch
-                    (only if multiple predict dataloaders used).
+
+                - batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]):
+                  The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list.
+                - batch_idx (int): The index of this batch.
+                - dataloader_idx (int): The index of the dataloader that produced this batch
+                  (only if multiple predict dataloaders used).
+
         """
         batch = self.to_device(args[0])
 
@@ -336,7 +339,7 @@ def on_train_end(self) -> None:
         """Hook to do something at the end of the training"""
         pass
 
-    def setup_optimizers(self, trainer: 'Trainer') -> None:
+    def setup_optimizers(self, trainer: 'pl.Trainer') -> None:
         """creates optimizers and schedulers
 
         Args:
@@ -385,7 +388,7 @@ def precision(self) -> Union[str, int]:
         return self.precision_plugin.precision
 
     @property
-    def scaler(self) -> Optional['GradScaler']:
+    def scaler(self) -> Optional['torch.cuda.amp.GradScaler']:
 
         return getattr(self.precision_plugin, 'scaler', None)
 
@@ -423,6 +426,7 @@ def all_gather(self, tensor: torch.Tensor, group: Optional[Any] = None, sync_gra
             tensor: tensor of shape (batch, ...)
             group: the process group to gather results from. Defaults to all processes (world)
             sync_grads: flag that allows users to synchronize gradients for all_gather op
+
         Return:
             A tensor of shape (world_size, batch, ...)
         """
@@ -451,7 +455,8 @@ def model_sharded_context(self) -> Generator[None, None, None]:
         shard the model instantly - useful for extremely large models. Can save memory and
         initialization time.
 
-        Returns: Model parallel context.
+        Returns:
+            Model parallel context.
         """
         with self.training_type_plugin.model_sharded_context():
             yield
@@ -498,7 +503,9 @@ def call_configure_sharded_model_hook(self) -> bool:
         """
         Allow model parallel hook to be called in suitable environments determined by the training type plugin.
         This is useful for when we want to shard the model once within fit.
-        Returns: True if we want to call the model parallel setup hook.
+
+        Returns:
+            True if we want to call the model parallel setup hook.
         """
         return self.training_type_plugin.call_configure_sharded_model_hook
 
@@ -512,7 +519,9 @@ def setup_optimizers_in_pre_dispatch(self) -> bool:
         Override to delay setting optimizers and schedulers till after dispatch.
         This is useful when the `TrainingTypePlugin` requires operating on the wrapped accelerator model.
         However this may break certain precision plugins such as APEX which require optimizers to be set.
-        Returns: If True, delay setup optimizers till pre_dispatch, else call within setup.
+
+        Returns:
+            If True, delay setup optimizers until `pre_dispatch`, else call within `setup`.
         """
         return self.training_type_plugin.setup_optimizers_in_pre_dispatch
 

diff --git a/pytorch_lightning/accelerators/cpu.py b/pytorch_lightning/accelerators/cpu.py
@@ -11,20 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import TYPE_CHECKING
-
+import pytorch_lightning as pl
 from pytorch_lightning.accelerators.accelerator import Accelerator
 from pytorch_lightning.plugins.precision import MixedPrecisionPlugin
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
-if TYPE_CHECKING:
-    from pytorch_lightning.core.lightning import LightningModule
-    from pytorch_lightning.trainer.trainer import Trainer
-
 
 class CPUAccelerator(Accelerator):
+    """ Accelerator for CPU devices. """
 
-    def setup(self, trainer: 'Trainer', model: 'LightningModule') -> None:
+    def setup(self, trainer: 'pl.Trainer', model: 'pl.LightningModule') -> None:
         """
         Raises:
             MisconfigurationException:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -231,6 +231,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
		- Fixed `EarlyStopping` logic when `min_epochs` or `min_steps` requirement is not met ([#6705](https://github.com/PyTorchLightning/pytorch-lightning/pull/6705))


		- Fixed TPU Spawn all gather ([#6896](https://github.com/PyTorchLightning/pytorch-lightning/pull/6896))


		- Fixed `--gpus` default for parser returned by `Trainer.add_argparse_args` ([#6898](https://github.com/PyTorchLightning/pytorch-lightning/pull/6898))


Expand Down
-Original file line number
+Diff line change
@@ -1,3 +1,5 @@
+    .. _plugins:
     #######
     Plugins
     #######
@@ Expand Down @@