From f29265669271fd2357d80cf61175da0aafc09144 Mon Sep 17 00:00:00 2001 From: Siyu Wang Date: Fri, 17 Dec 2021 14:42:57 -0800 Subject: [PATCH 1/8] Rename DDPFullyShardedPlugin to DDPFullyShardedStrategy --- pytorch_lightning/plugins/__init__.py | 4 ++-- pytorch_lightning/plugins/training_type/__init__.py | 2 +- pytorch_lightning/plugins/training_type/fully_sharded.py | 2 +- .../trainer/connectors/accelerator_connector.py | 6 +++--- .../plugins/test_ddp_fully_sharded_with_full_state_dict.py | 6 +++--- tests/plugins/test_plugins_registry.py | 6 +++--- tests/trainer/test_trainer.py | 4 ++-- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pytorch_lightning/plugins/__init__.py b/pytorch_lightning/plugins/__init__.py index 5ccbe8957694b..bb4ea4e6f6819 100644 --- a/pytorch_lightning/plugins/__init__.py +++ b/pytorch_lightning/plugins/__init__.py @@ -24,7 +24,7 @@ from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin from pytorch_lightning.plugins.training_type.deepspeed import DeepSpeedPlugin from pytorch_lightning.plugins.training_type.dp import DataParallelPlugin -from pytorch_lightning.plugins.training_type.fully_sharded import DDPFullyShardedPlugin +from pytorch_lightning.plugins.training_type.fully_sharded import DDPFullyShardedStrategy from pytorch_lightning.plugins.training_type.horovod import HorovodPlugin from pytorch_lightning.plugins.training_type.ipu import IPUPlugin from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin @@ -47,7 +47,7 @@ "DDP2Plugin", "DDPPlugin", "DDPSpawnPlugin", - "DDPFullyShardedPlugin", + "DDPFullyShardedStrategy", "DeepSpeedPlugin", "DeepSpeedPrecisionPlugin", "DoublePrecisionPlugin", diff --git a/pytorch_lightning/plugins/training_type/__init__.py b/pytorch_lightning/plugins/training_type/__init__.py index 6a56d68e17db9..52ef2932198d0 100644 --- a/pytorch_lightning/plugins/training_type/__init__.py +++ b/pytorch_lightning/plugins/training_type/__init__.py @@ -3,7 +3,7 @@ from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin # noqa: F401 from pytorch_lightning.plugins.training_type.deepspeed import DeepSpeedPlugin # noqa: F401 from pytorch_lightning.plugins.training_type.dp import DataParallelPlugin # noqa: F401 -from pytorch_lightning.plugins.training_type.fully_sharded import DDPFullyShardedPlugin # noqa: F401 +from pytorch_lightning.plugins.training_type.fully_sharded import DDPFullyShardedStrategy # noqa: F401 from pytorch_lightning.plugins.training_type.horovod import HorovodPlugin # noqa: F401 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin # noqa: F401 from pytorch_lightning.plugins.training_type.sharded import DDPShardedPlugin # noqa: F401 diff --git a/pytorch_lightning/plugins/training_type/fully_sharded.py b/pytorch_lightning/plugins/training_type/fully_sharded.py index 475701e13f593..2022dcbd33b34 100644 --- a/pytorch_lightning/plugins/training_type/fully_sharded.py +++ b/pytorch_lightning/plugins/training_type/fully_sharded.py @@ -31,7 +31,7 @@ from fairscale.nn.data_parallel import FullyShardedDataParallel -class DDPFullyShardedPlugin(DDPPlugin): +class DDPFullyShardedStrategy(DDPPlugin): distributed_backend = _StrategyType.DDP_FULLY_SHARDED diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 0154bc94acac1..4c41b38290ab6 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -29,7 +29,7 @@ CheckpointIO, DataParallelPlugin, DDP2Plugin, - DDPFullyShardedPlugin, + DDPFullyShardedStrategy, DDPPlugin, DDPShardedPlugin, DDPSpawnPlugin, @@ -540,7 +540,7 @@ def _is_sharded_training_type(self) -> bool: @property def _is_fully_sharded_training_type(self) -> bool: - return isinstance(self._training_type_plugin, DDPFullyShardedPlugin) + return isinstance(self._training_type_plugin, DDPFullyShardedStrategy) @property def is_distributed(self) -> bool: @@ -737,7 +737,7 @@ def select_training_type_plugin(self) -> TrainingTypePlugin: elif use_ddp_spawn or use_ddp_cpu_spawn: ddp_plugin_cls = DDPSpawnPlugin elif use_ddp_fully_sharded: - ddp_plugin_cls = DDPFullyShardedPlugin + ddp_plugin_cls = DDPFullyShardedStrategy else: ddp_plugin_cls = DDPPlugin diff --git a/tests/plugins/test_ddp_fully_sharded_with_full_state_dict.py b/tests/plugins/test_ddp_fully_sharded_with_full_state_dict.py index c4a2eeaf74c0b..b7a93b52cdb0c 100644 --- a/tests/plugins/test_ddp_fully_sharded_with_full_state_dict.py +++ b/tests/plugins/test_ddp_fully_sharded_with_full_state_dict.py @@ -7,7 +7,7 @@ from pytorch_lightning import Trainer from pytorch_lightning.callbacks import ModelCheckpoint -from pytorch_lightning.plugins import DDPFullyShardedPlugin, FullyShardedNativeMixedPrecisionPlugin +from pytorch_lightning.plugins import DDPFullyShardedStrategy, FullyShardedNativeMixedPrecisionPlugin from pytorch_lightning.utilities import _FAIRSCALE_FULLY_SHARDED_AVAILABLE from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers.boring_model import BoringModel @@ -23,7 +23,7 @@ def test_invalid_on_cpu(tmpdir): MisconfigurationException, match="You selected accelerator to be `ddp_fully_sharded`, but GPU is not available." ): trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, strategy="fsdp") - assert isinstance(trainer.training_type_plugin, DDPFullyShardedPlugin) + assert isinstance(trainer.training_type_plugin, DDPFullyShardedStrategy) trainer.training_type_plugin.setup_environment() @@ -34,7 +34,7 @@ def test_invalid_on_cpu(tmpdir): def test_fsdp_with_sharded_amp(device_count_mock, mock_cuda_available, tmpdir): """Test to ensure that plugin native amp plugin is correctly chosen when using sharded.""" trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, strategy="fsdp", gpus=1, precision=16) - assert isinstance(trainer.training_type_plugin, DDPFullyShardedPlugin) + assert isinstance(trainer.training_type_plugin, DDPFullyShardedStrategy) assert isinstance(trainer.training_type_plugin.precision_plugin, FullyShardedNativeMixedPrecisionPlugin) diff --git a/tests/plugins/test_plugins_registry.py b/tests/plugins/test_plugins_registry.py index b4ec7574fedd1..148e3fd85b7ee 100644 --- a/tests/plugins/test_plugins_registry.py +++ b/tests/plugins/test_plugins_registry.py @@ -16,7 +16,7 @@ from pytorch_lightning import Trainer from pytorch_lightning.plugins import ( CheckpointIO, - DDPFullyShardedPlugin, + DDPFullyShardedStrategy, DDPPlugin, DDPShardedPlugin, DDPSpawnPlugin, @@ -99,11 +99,11 @@ def test_fsdp_plugins_registry(tmpdir): plugin = "fsdp" assert plugin in TrainingTypePluginsRegistry - assert TrainingTypePluginsRegistry[plugin]["plugin"] == DDPFullyShardedPlugin + assert TrainingTypePluginsRegistry[plugin]["plugin"] == DDPFullyShardedStrategy trainer = Trainer(strategy=plugin) - assert isinstance(trainer.training_type_plugin, DDPFullyShardedPlugin) + assert isinstance(trainer.training_type_plugin, DDPFullyShardedStrategy) @pytest.mark.parametrize( diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index cad75fb23492c..61a75963b26a3 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -40,7 +40,7 @@ from pytorch_lightning.plugins import ( DataParallelPlugin, DDP2Plugin, - DDPFullyShardedPlugin, + DDPFullyShardedStrategy, DDPPlugin, DDPShardedPlugin, DDPSpawnPlugin, @@ -2210,7 +2210,7 @@ def training_step(self, batch, batch_idx): dict(_distrib_type=_StrategyType.DP, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=1), ), ( - dict(strategy=DDPFullyShardedPlugin(), gpus=2), + dict(strategy=DDPFullyShardedStrategy(), gpus=2), dict( _distrib_type=_StrategyType.DDP_FULLY_SHARDED, _device_type=_AcceleratorType.GPU, From d1d9e3db3564ebc020f41ace55f0176844fcff12 Mon Sep 17 00:00:00 2001 From: Siyu Wang Date: Fri, 17 Dec 2021 16:26:07 -0800 Subject: [PATCH 2/8] Renamed the to --- CHANGELOG.md | 3 ++ docs/source/advanced/training_tricks.rst | 2 +- docs/source/api_references.rst | 2 +- docs/source/extensions/plugins.rst | 2 +- docs/source/guides/speed.rst | 4 +-- pytorch_lightning/core/lightning.py | 4 +-- pytorch_lightning/distributed/dist.py | 2 +- pytorch_lightning/lite/lite.py | 12 +++++-- .../loops/dataloader/prediction_loop.py | 10 +++--- pytorch_lightning/plugins/__init__.py | 4 +-- .../plugins/training_type/__init__.py | 2 +- .../plugins/training_type/ddp_spawn.py | 2 +- .../plugins/training_type/sharded_spawn.py | 4 +-- .../plugins/training_type/tpu_spawn.py | 4 +-- .../trainer/configuration_validator.py | 4 +-- .../connectors/accelerator_connector.py | 4 +-- pytorch_lightning/trainer/trainer.py | 8 ++--- .../test_accelerator_connector.py | 32 +++++++++---------- tests/callbacks/test_stochastic_weight_avg.py | 4 +-- tests/lite/test_parity.py | 4 +-- tests/models/test_sync_batchnorm.py | 4 +-- .../plugins/test_ddp_plugin_with_comm_hook.py | 4 +-- tests/plugins/test_ddp_spawn_plugin.py | 24 +++++++------- tests/plugins/test_plugins_registry.py | 4 +-- tests/trainer/test_trainer.py | 10 +++--- 25 files changed, 84 insertions(+), 75 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d769406c54a17..74e28ac14d9f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -132,6 +132,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - DeepSpeed does not require lightning module zero 3 partitioning ([#10655](https://github.com/PyTorchLightning/pytorch-lightning/pull/10655)) +- Renamed the `DDPSpawnPlugin` to `DDPSpawnStrategy` ([#11144](https://github.com/PyTorchLightning/pytorch-lightning/pull/11144)) + + ### Deprecated - Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/PyTorchLightning/pytorch-lightning/issues/10103)) diff --git a/docs/source/advanced/training_tricks.rst b/docs/source/advanced/training_tricks.rst index c6be9d12fa843..97e86c5efd1c7 100644 --- a/docs/source/advanced/training_tricks.rst +++ b/docs/source/advanced/training_tricks.rst @@ -178,7 +178,7 @@ For example, when training Graph Neural Networks, a common strategy is to load t A simple way to prevent redundant dataset replicas is to rely on :obj:`torch.multiprocessing` to share the `data automatically between spawned processes via shared memory `_. For this, all data pre-loading should be done on the main process inside :meth:`DataModule.__init__`. -As a result, all tensor-data will get automatically shared when using the :class:`~pytorch_lightning.plugins.DDPSpawnPlugin` training type plugin: +As a result, all tensor-data will get automatically shared when using the :class:`~pytorch_lightning.plugins.DDPSpawnStrategy` training type plugin: .. warning:: diff --git a/docs/source/api_references.rst b/docs/source/api_references.rst index ca3220b0f5592..86615c822a6c4 100644 --- a/docs/source/api_references.rst +++ b/docs/source/api_references.rst @@ -154,7 +154,7 @@ Training Type Plugins DDP2Plugin DDPShardedPlugin DDPSpawnShardedPlugin - DDPSpawnPlugin + DDPSpawnStrategy DeepSpeedPlugin HorovodPlugin SingleTPUPlugin diff --git a/docs/source/extensions/plugins.rst b/docs/source/extensions/plugins.rst index f791df894d0c8..59b95c5f64a4f 100644 --- a/docs/source/extensions/plugins.rst +++ b/docs/source/extensions/plugins.rst @@ -113,7 +113,7 @@ Training Type Plugins DDP2Plugin DDPShardedPlugin DDPSpawnShardedPlugin - DDPSpawnPlugin + DDPSpawnStrategy DeepSpeedPlugin HorovodPlugin SingleTPUPlugin diff --git a/docs/source/guides/speed.rst b/docs/source/guides/speed.rst index 572da420d4280..ef79d7be18774 100644 --- a/docs/source/guides/speed.rst +++ b/docs/source/guides/speed.rst @@ -95,11 +95,11 @@ This by default comes with a performance hit, and can be disabled in most cases. .. code-block:: python - from pytorch_lightning.plugins import DDPSpawnPlugin + from pytorch_lightning.plugins import DDPSpawnStrategy trainer = pl.Trainer( gpus=2, - strategy=DDPSpawnPlugin(find_unused_parameters=False), + strategy=DDPSpawnStrategy(find_unused_parameters=False), ) When using DDP on a multi-node cluster, set NCCL parameters diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index c7b6d1ced35e1..87557e5032bb8 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1924,7 +1924,7 @@ def add_to_queue(self, queue: pl.plugins.training_type.ddp_spawn._FakeQueue) -> queue: the instance of the queue to append the data. .. deprecated:: v1.5 - This method was deprecated in v1.5 in favor of `DDPSpawnPlugin.add_to_queue` + This method was deprecated in v1.5 in favor of `DDPSpawnStrategy.add_to_queue` and will be removed in v1.7. """ @@ -1936,7 +1936,7 @@ def get_from_queue(self, queue: pl.plugins.training_type.ddp_spawn._FakeQueue) - queue: the instance of the queue from where to get the data. .. deprecated:: v1.5 - This method was deprecated in v1.5 in favor of `DDPSpawnPlugin.get_from_queue` + This method was deprecated in v1.5 in favor of `DDPSpawnStrategy.get_from_queue` and will be removed in v1.7. """ diff --git a/pytorch_lightning/distributed/dist.py b/pytorch_lightning/distributed/dist.py index a0054d17936b0..83552cd4962e5 100644 --- a/pytorch_lightning/distributed/dist.py +++ b/pytorch_lightning/distributed/dist.py @@ -23,7 +23,7 @@ class LightningDistributed: """ .. deprecated:: v1.5 This class is deprecated in v1.5 and will be removed in v1.7. - The broadcast logic will be moved to the :class:`DDPPlugin` and :class`DDPSpawnPlugin` classes. + The broadcast logic will be moved to the :class:`DDPPlugin` and :class`DDPSpawnStrategy` classes. """ def __init__(self, rank=None, device=None): diff --git a/pytorch_lightning/lite/lite.py b/pytorch_lightning/lite/lite.py index a07ed1cc3dfab..c40485960f8df 100644 --- a/pytorch_lightning/lite/lite.py +++ b/pytorch_lightning/lite/lite.py @@ -26,7 +26,13 @@ from pytorch_lightning.accelerators.accelerator import Accelerator from pytorch_lightning.lite.wrappers import _LiteDataLoader, _LiteModule, _LiteOptimizer -from pytorch_lightning.plugins import DDPSpawnPlugin, DeepSpeedPlugin, PLUGIN_INPUT, TPUSpawnPlugin, TrainingTypePlugin +from pytorch_lightning.plugins import ( + DDPSpawnStrategy, + DeepSpeedPlugin, + PLUGIN_INPUT, + TPUSpawnPlugin, + TrainingTypePlugin, +) from pytorch_lightning.plugins.training_type.training_type_plugin import TBroadcast from pytorch_lightning.trainer.connectors.accelerator_connector import AcceleratorConnector from pytorch_lightning.utilities import _AcceleratorType, _StrategyType, move_data_to_device @@ -310,7 +316,7 @@ def to_device(self, obj: Union[nn.Module, Tensor, Any]) -> Union[nn.Module, Tens """ if isinstance(obj, nn.Module): if self.device.type == "cuda": - # need to call this manually here again in case we spawned with DDPSpawnPlugin + # need to call this manually here again in case we spawned with DDPSpawnStrategy # TODO: refactor to let plugin handle this cleanly torch.cuda.set_device(self.device) return obj.to(self.device) @@ -403,7 +409,7 @@ def _run_impl(self, run_method: Callable, *args: Any, **kwargs: Any) -> Any: # apply sharded context to prevent OOM run_method = partial(self._run_with_sharded_context, run_method) - if isinstance(self._strategy, DDPSpawnPlugin): + if isinstance(self._strategy, DDPSpawnStrategy): return self._strategy.spawn(run_method, *args, **kwargs) else: return run_method(*args, **kwargs) diff --git a/pytorch_lightning/loops/dataloader/prediction_loop.py b/pytorch_lightning/loops/dataloader/prediction_loop.py index 8a0b50a30a9fa..6c316b5da2874 100644 --- a/pytorch_lightning/loops/dataloader/prediction_loop.py +++ b/pytorch_lightning/loops/dataloader/prediction_loop.py @@ -5,7 +5,7 @@ from pytorch_lightning.loops.dataloader.dataloader_loop import DataLoaderLoop from pytorch_lightning.loops.epoch.prediction_epoch_loop import PredictionEpochLoop -from pytorch_lightning.plugins import DDPSpawnPlugin +from pytorch_lightning.plugins import DDPSpawnStrategy from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.types import _PREDICT_OUTPUT @@ -29,14 +29,14 @@ def return_predictions(self) -> bool: @return_predictions.setter def return_predictions(self, return_predictions: Optional[bool] = None) -> None: - # `DDPSpawnPlugin` plugins and derivatives don't support return predictions. - is_ddp_spawn = isinstance(self.trainer.training_type_plugin, DDPSpawnPlugin) + # `DDPSpawnStrategy` plugins and derivatives don't support return predictions. + is_ddp_spawn = isinstance(self.trainer.training_type_plugin, DDPSpawnStrategy) if return_predictions and is_ddp_spawn: raise MisconfigurationException( - "`return_predictions` should be set to `False` when using the `DDPSpawnPlugin` or children class. " + "`return_predictions` should be set to `False` when using the `DDPSpawnStrategy` or children class. " f"Found {return_predictions} with training_type_plugin {type(self.trainer.training_type_plugin)}." ) - # For non `DDPSpawnPlugin` plugin, the `return_predictions` is True by default unless user decide otherwise. + # For non `DDPSpawnStrategy` plugin, the `return_predictions` is True by default unless user decide otherwise. self._return_predictions = not is_ddp_spawn if return_predictions is None else return_predictions @property diff --git a/pytorch_lightning/plugins/__init__.py b/pytorch_lightning/plugins/__init__.py index bb4ea4e6f6819..8497e532eedd1 100644 --- a/pytorch_lightning/plugins/__init__.py +++ b/pytorch_lightning/plugins/__init__.py @@ -21,7 +21,7 @@ from pytorch_lightning.plugins.precision.tpu_bf16 import TPUBf16PrecisionPlugin from pytorch_lightning.plugins.training_type.ddp import DDPPlugin from pytorch_lightning.plugins.training_type.ddp2 import DDP2Plugin -from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin +from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnStrategy from pytorch_lightning.plugins.training_type.deepspeed import DeepSpeedPlugin from pytorch_lightning.plugins.training_type.dp import DataParallelPlugin from pytorch_lightning.plugins.training_type.fully_sharded import DDPFullyShardedStrategy @@ -46,7 +46,7 @@ "DataParallelPlugin", "DDP2Plugin", "DDPPlugin", - "DDPSpawnPlugin", + "DDPSpawnStrategy", "DDPFullyShardedStrategy", "DeepSpeedPlugin", "DeepSpeedPrecisionPlugin", diff --git a/pytorch_lightning/plugins/training_type/__init__.py b/pytorch_lightning/plugins/training_type/__init__.py index 52ef2932198d0..eb9663de5ea36 100644 --- a/pytorch_lightning/plugins/training_type/__init__.py +++ b/pytorch_lightning/plugins/training_type/__init__.py @@ -1,6 +1,6 @@ from pytorch_lightning.plugins.training_type.ddp import DDPPlugin # noqa: F401 from pytorch_lightning.plugins.training_type.ddp2 import DDP2Plugin # noqa: F401 -from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin # noqa: F401 +from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnStrategy # noqa: F401 from pytorch_lightning.plugins.training_type.deepspeed import DeepSpeedPlugin # noqa: F401 from pytorch_lightning.plugins.training_type.dp import DataParallelPlugin # noqa: F401 from pytorch_lightning.plugins.training_type.fully_sharded import DDPFullyShardedStrategy # noqa: F401 diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index 0468177e4aa4a..c6f3f0d672dd9 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -54,7 +54,7 @@ log = logging.getLogger(__name__) -class DDPSpawnPlugin(ParallelPlugin): +class DDPSpawnStrategy(ParallelPlugin): """Spawns processes using the :func:`torch.multiprocessing.spawn` method and joins processes after training finishes.""" diff --git a/pytorch_lightning/plugins/training_type/sharded_spawn.py b/pytorch_lightning/plugins/training_type/sharded_spawn.py index ee91abbd840c6..4922291251089 100644 --- a/pytorch_lightning/plugins/training_type/sharded_spawn.py +++ b/pytorch_lightning/plugins/training_type/sharded_spawn.py @@ -19,7 +19,7 @@ from torch.optim import Optimizer import pytorch_lightning as pl -from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin +from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnStrategy from pytorch_lightning.trainer.states import TrainerFn from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE, rank_zero_only from pytorch_lightning.utilities.enums import _StrategyType @@ -32,7 +32,7 @@ from pytorch_lightning.overrides.fairscale import LightningShardedDataParallel, unwrap_lightning_module_sharded -class DDPSpawnShardedPlugin(DDPSpawnPlugin): +class DDPSpawnShardedPlugin(DDPSpawnStrategy): """Optimizer sharded training provided by FairScale.""" distributed_backend = _StrategyType.DDP_SHARDED_SPAWN diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py index 8b4f0536d6b5b..debc483f2b116 100644 --- a/pytorch_lightning/plugins/training_type/tpu_spawn.py +++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py @@ -27,7 +27,7 @@ from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.io.xla_plugin import XLACheckpointIO from pytorch_lightning.plugins.precision import PrecisionPlugin -from pytorch_lightning.plugins.training_type.ddp_spawn import _FakeQueue, _SpawnOutput, DDPSpawnPlugin +from pytorch_lightning.plugins.training_type.ddp_spawn import _FakeQueue, _SpawnOutput, DDPSpawnStrategy from pytorch_lightning.trainer.connectors.data_connector import DataConnector from pytorch_lightning.trainer.states import TrainerFn from pytorch_lightning.utilities import _TPU_AVAILABLE, find_shared_parameters, set_shared_parameters @@ -49,7 +49,7 @@ xm, xmp, MpDeviceLoader, rendezvous = [None] * 4 -class TPUSpawnPlugin(DDPSpawnPlugin): +class TPUSpawnPlugin(DDPSpawnStrategy): """Plugin for training multiple TPU devices using the :func:`torch.multiprocessing.spawn` method.""" def __init__( diff --git a/pytorch_lightning/trainer/configuration_validator.py b/pytorch_lightning/trainer/configuration_validator.py index 3a99cb3778e65..a6467f54520f8 100644 --- a/pytorch_lightning/trainer/configuration_validator.py +++ b/pytorch_lightning/trainer/configuration_validator.py @@ -263,12 +263,12 @@ def _check_add_get_queue(model: "pl.LightningModule") -> None: if is_overridden("add_to_queue", model): rank_zero_deprecation( "The `LightningModule.add_to_queue` method was deprecated in v1.5 and will be removed in v1.7 in " - "favor of `DDPSpawnPlugin.add_to_queue`" + "favor of `DDPSpawnStrategy.add_to_queue`" ) if is_overridden("get_from_queue", model): rank_zero_deprecation( "The `LightningModule.get_from_queue` method was deprecated in v1.5 and will be removed in v1.7 in " - "favor of `DDPSpawnPlugin.get_from_queue`" + "favor of `DDPSpawnStrategy.get_from_queue`" ) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 4c41b38290ab6..4374d37a05e56 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -32,8 +32,8 @@ DDPFullyShardedStrategy, DDPPlugin, DDPShardedPlugin, - DDPSpawnPlugin, DDPSpawnShardedPlugin, + DDPSpawnStrategy, DeepSpeedPlugin, DeepSpeedPrecisionPlugin, DoublePrecisionPlugin, @@ -735,7 +735,7 @@ def select_training_type_plugin(self) -> TrainingTypePlugin: ): ddp_plugin_cls = DDPPlugin elif use_ddp_spawn or use_ddp_cpu_spawn: - ddp_plugin_cls = DDPSpawnPlugin + ddp_plugin_cls = DDPSpawnStrategy elif use_ddp_fully_sharded: ddp_plugin_cls = DDPFullyShardedStrategy else: diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 47459f5ba664f..13e36c2be7d20 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -41,7 +41,7 @@ from pytorch_lightning.loops.utilities import _parse_loop_limits from pytorch_lightning.plugins import ( ApexMixedPrecisionPlugin, - DDPSpawnPlugin, + DDPSpawnStrategy, NativeMixedPrecisionPlugin, ParallelPlugin, PLUGIN_INPUT, @@ -669,7 +669,7 @@ def _call_and_handle_interrupt(self, trainer_fn: Callable, *args: Any, **kwargs: **kwargs: keyword arguments to be passed to `trainer_fn` """ try: - if isinstance(self.training_type_plugin, DDPSpawnPlugin): + if isinstance(self.training_type_plugin, DDPSpawnStrategy): spawn_output: _SpawnOutput = self.training_type_plugin.spawn(trainer_fn, *args, **kwargs) self.training_type_plugin._recover_results_in_main_process(spawn_output, self) return spawn_output.trainer_results @@ -1178,7 +1178,7 @@ def _run( self.state.status = TrainerStatus.FINISHED self.state.stage = None - if isinstance(self.training_type_plugin, DDPSpawnPlugin): + if isinstance(self.training_type_plugin, DDPSpawnStrategy): results = self.training_type_plugin._collect_rank_zero_results(self, results) return results @@ -1419,7 +1419,7 @@ def _handle_meta_model(self) -> None: if not is_on_meta_device(self.lightning_module): return - if isinstance(self.training_type_plugin, DDPSpawnPlugin): + if isinstance(self.training_type_plugin, DDPSpawnStrategy): raise MisconfigurationException("LightningModule on meta device isn't supported with spawn.") materialize_module(self.lightning_module) diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 0ef10b4eb2a9f..493b54411893c 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -30,8 +30,8 @@ DDP2Plugin, DDPPlugin, DDPShardedPlugin, - DDPSpawnPlugin, DDPSpawnShardedPlugin, + DDPSpawnStrategy, DeepSpeedPlugin, ParallelPlugin, PrecisionPlugin, @@ -60,7 +60,7 @@ def test_accelerator_choice_ddp_cpu(tmpdir, num_processes: int, num_nodes: int): trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", num_processes=num_processes, num_nodes=num_nodes) assert isinstance(trainer.accelerator, CPUAccelerator) no_spawn = num_processes == 1 and num_nodes > 1 - assert isinstance(trainer.training_type_plugin, DDPPlugin if no_spawn else DDPSpawnPlugin) + assert isinstance(trainer.training_type_plugin, DDPPlugin if no_spawn else DDPSpawnStrategy) assert isinstance(trainer.training_type_plugin.cluster_environment, LightningEnvironment) @@ -82,7 +82,7 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock): with pytest.deprecated_call(match=r"accelerator='ddp_spawn'\)` has been deprecated"): trainer = Trainer(fast_dev_run=True, accelerator="ddp_spawn", gpus=1) assert isinstance(trainer.accelerator, GPUAccelerator) - assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) + assert isinstance(trainer.training_type_plugin, DDPSpawnStrategy) assert isinstance(trainer.training_type_plugin.cluster_environment, LightningEnvironment) @@ -346,7 +346,7 @@ def test_accelerator_choice_ddp_cpu_and_strategy(tmpdir): @RunIf(skip_windows=True, skip_49370=True) def test_accelerator_choice_ddp_cpu_and_strategy_spawn(tmpdir): """Test that accelerator="ddp_cpu" can work together with an instance of DDPPSpawnPlugin.""" - _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class=DDPSpawnPlugin) + _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class=DDPSpawnStrategy) def _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class): @@ -499,7 +499,7 @@ def test_plugin_accelerator_choice(accelerator: Optional[str], plugin: str): ["accelerator", "plugin"], [ ("ddp", DDPPlugin), - ("ddp_spawn", DDPSpawnPlugin), + ("ddp_spawn", DDPSpawnStrategy), ("ddp_sharded", DDPShardedPlugin), ("ddp_sharded_spawn", DDPSpawnShardedPlugin), pytest.param("deepspeed", DeepSpeedPlugin, marks=RunIf(deepspeed=True)), @@ -568,7 +568,7 @@ def test_accelerator_cpu_with_multiple_gpus(): assert isinstance(trainer.accelerator, CPUAccelerator) -@pytest.mark.parametrize(["devices", "plugin"], [(1, SingleDevicePlugin), (5, DDPSpawnPlugin)]) +@pytest.mark.parametrize(["devices", "plugin"], [(1, SingleDevicePlugin), (5, DDPSpawnStrategy)]) def test_accelerator_cpu_with_devices(devices, plugin): trainer = Trainer(accelerator="cpu", devices=devices) @@ -590,7 +590,7 @@ def test_accelerator_cpu_with_num_processes_priority(): @RunIf(min_gpus=2) @pytest.mark.parametrize( - ["devices", "plugin"], [(1, SingleDevicePlugin), ([1], SingleDevicePlugin), (2, DDPSpawnPlugin)] + ["devices", "plugin"], [(1, SingleDevicePlugin), ([1], SingleDevicePlugin), (2, DDPSpawnStrategy)] ) def test_accelerator_gpu_with_devices(devices, plugin): @@ -684,8 +684,8 @@ def test_exception_invalid_strategy(): @pytest.mark.parametrize( ["strategy", "plugin"], [ - ("ddp_spawn", DDPSpawnPlugin), - ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), + ("ddp_spawn", DDPSpawnStrategy), + ("ddp_spawn_find_unused_parameters_false", DDPSpawnStrategy), ("ddp", DDPPlugin), ("ddp_find_unused_parameters_false", DDPPlugin), ], @@ -695,7 +695,7 @@ def test_strategy_choice_cpu_str(tmpdir, strategy, plugin): assert isinstance(trainer.training_type_plugin, plugin) -@pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) +@pytest.mark.parametrize("plugin", [DDPSpawnStrategy, DDPPlugin]) def test_strategy_choice_cpu_plugin(tmpdir, plugin): trainer = Trainer(strategy=plugin(), accelerator="cpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @@ -705,8 +705,8 @@ def test_strategy_choice_cpu_plugin(tmpdir, plugin): @pytest.mark.parametrize( ["strategy", "plugin"], [ - ("ddp_spawn", DDPSpawnPlugin), - ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), + ("ddp_spawn", DDPSpawnStrategy), + ("ddp_spawn_find_unused_parameters_false", DDPSpawnStrategy), ("ddp", DDPPlugin), ("ddp_find_unused_parameters_false", DDPPlugin), ("ddp2", DDP2Plugin), @@ -722,14 +722,14 @@ def test_strategy_choice_gpu_str(tmpdir, strategy, plugin): @RunIf(min_gpus=2) -@pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) +@pytest.mark.parametrize("plugin", [DDPSpawnStrategy, DDPPlugin]) def test_strategy_choice_gpu_plugin(tmpdir, plugin): trainer = Trainer(strategy=plugin(), accelerator="gpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @RunIf(min_gpus=2) -@pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPPlugin]) +@pytest.mark.parametrize("plugin", [DDPSpawnStrategy, DDPPlugin]) def test_device_type_when_training_plugin_gpu_passed(tmpdir, plugin): trainer = Trainer(strategy=plugin(), gpus=2) @@ -753,7 +753,7 @@ def test_amp_level_raises_error_with_native(): def test_strategy_choice_ddp_spawn_cpu(tmpdir): trainer = Trainer(fast_dev_run=True, strategy="ddp_spawn", num_processes=2) assert isinstance(trainer.accelerator, CPUAccelerator) - assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) + assert isinstance(trainer.training_type_plugin, DDPSpawnStrategy) assert isinstance(trainer.training_type_plugin.cluster_environment, LightningEnvironment) @@ -773,7 +773,7 @@ def test_strategy_choice_ddp(cuda_available_mock, device_count_mock): def test_strategy_choice_ddp_spawn(cuda_available_mock, device_count_mock): trainer = Trainer(fast_dev_run=True, strategy="ddp_spawn", gpus=1) assert isinstance(trainer.accelerator, GPUAccelerator) - assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) + assert isinstance(trainer.training_type_plugin, DDPSpawnStrategy) assert isinstance(trainer.training_type_plugin.cluster_environment, LightningEnvironment) diff --git a/tests/callbacks/test_stochastic_weight_avg.py b/tests/callbacks/test_stochastic_weight_avg.py index 02e9aabd0fce1..11675827ec996 100644 --- a/tests/callbacks/test_stochastic_weight_avg.py +++ b/tests/callbacks/test_stochastic_weight_avg.py @@ -22,7 +22,7 @@ from pytorch_lightning import LightningModule, Trainer from pytorch_lightning.callbacks import StochasticWeightAveraging -from pytorch_lightning.plugins import DDPSpawnPlugin +from pytorch_lightning.plugins import DDPSpawnStrategy from pytorch_lightning.plugins.training_type import TrainingTypePlugin from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers.boring_model import BoringModel, RandomDataset, RandomIterableDataset @@ -99,7 +99,7 @@ def on_train_end(self, trainer, pl_module): assert trainer.accumulate_grad_batches == 2 assert trainer.num_training_batches == 5 - if not isinstance(trainer.training_type_plugin, DDPSpawnPlugin): + if not isinstance(trainer.training_type_plugin, DDPSpawnStrategy): # check backward call count. the batchnorm update epoch should not backward assert trainer.training_type_plugin.backward.call_count == trainer.max_epochs * trainer.limit_train_batches diff --git a/tests/lite/test_parity.py b/tests/lite/test_parity.py index d4d0ca6e5e9c7..207478caf4eab 100644 --- a/tests/lite/test_parity.py +++ b/tests/lite/test_parity.py @@ -29,7 +29,7 @@ from pytorch_lightning.lite import LightningLite from pytorch_lightning.plugins.environments.lightning_environment import find_free_network_port -from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin +from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnStrategy from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device from pytorch_lightning.utilities.cloud_io import atomic_save from tests.helpers.boring_model import RandomDataset @@ -86,7 +86,7 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, num_epochs: int = self.backward(loss) optimizer.step() - if isinstance(self._strategy, DDPSpawnPlugin) and tmpdir and self.global_rank == 0: + if isinstance(self._strategy, DDPSpawnStrategy) and tmpdir and self.global_rank == 0: checkpoint_path = os.path.join(tmpdir, "model.pt") atomic_save(model.state_dict(), checkpoint_path) return checkpoint_path diff --git a/tests/models/test_sync_batchnorm.py b/tests/models/test_sync_batchnorm.py index 5035e71f928fc..d490fbdae41e7 100644 --- a/tests/models/test_sync_batchnorm.py +++ b/tests/models/test_sync_batchnorm.py @@ -17,7 +17,7 @@ import torch.nn.functional as F from pytorch_lightning import LightningModule, seed_everything, Trainer -from pytorch_lightning.plugins import DDPSpawnPlugin +from pytorch_lightning.plugins import DDPSpawnStrategy from pytorch_lightning.plugins.environments import LightningEnvironment from pytorch_lightning.utilities import FLOAT16_EPSILON from tests.helpers.datamodules import MNISTDataModule @@ -105,7 +105,7 @@ def test_sync_batchnorm_ddp(tmpdir): dm.setup(stage=None) model = SyncBNModule(gpu_count=2, bn_targets=bn_outputs) - ddp = DDPSpawnPlugin( + ddp = DDPSpawnStrategy( parallel_devices=[torch.device("cuda", 0), torch.device("cuda", 1)], num_nodes=1, sync_batchnorm=True, diff --git a/tests/plugins/test_ddp_plugin_with_comm_hook.py b/tests/plugins/test_ddp_plugin_with_comm_hook.py index 69d320b52d426..2a80e46841aa0 100644 --- a/tests/plugins/test_ddp_plugin_with_comm_hook.py +++ b/tests/plugins/test_ddp_plugin_with_comm_hook.py @@ -14,7 +14,7 @@ import torch from pytorch_lightning import Trainer -from pytorch_lightning.plugins import DDPPlugin, DDPSpawnPlugin +from pytorch_lightning.plugins import DDPPlugin, DDPSpawnStrategy from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_8, _TORCH_GREATER_EQUAL_1_10 from tests.helpers import BoringModel from tests.helpers.runif import RunIf @@ -97,7 +97,7 @@ def test_ddp_fp16_compress_wrap_sgd_comm_hook(tmpdir): def test_ddp_spawn_fp16_compress_comm_hook(tmpdir): """Test for DDP Spawn FP16 compress hook.""" model = BoringModel() - training_type_plugin = DDPSpawnPlugin(ddp_comm_hook=default.fp16_compress_hook) + training_type_plugin = DDPSpawnStrategy(ddp_comm_hook=default.fp16_compress_hook) trainer = Trainer( max_epochs=1, gpus=2, diff --git a/tests/plugins/test_ddp_spawn_plugin.py b/tests/plugins/test_ddp_spawn_plugin.py index c8c861050d844..5f0074dcd7718 100644 --- a/tests/plugins/test_ddp_spawn_plugin.py +++ b/tests/plugins/test_ddp_spawn_plugin.py @@ -19,7 +19,7 @@ from torch.nn.parallel.distributed import DistributedDataParallel from pytorch_lightning import LightningModule, Trainer -from pytorch_lightning.plugins import DDPSpawnPlugin +from pytorch_lightning.plugins import DDPSpawnStrategy from pytorch_lightning.trainer.states import TrainerFn from tests.helpers.boring_model import BoringDataModule, BoringModel from tests.helpers.runif import RunIf @@ -52,11 +52,11 @@ def get_from_queue(self, queue) -> None: @RunIf(skip_windows=True, skip_49370=True) def test_ddp_cpu(): - """Tests if device is set correctly when training for DDPSpawnPlugin.""" + """Tests if device is set correctly when training for DDPSpawnStrategy.""" trainer = Trainer(num_processes=2, fast_dev_run=True) # assert training type plugin attributes for device setting - assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) + assert isinstance(trainer.training_type_plugin, DDPSpawnStrategy) assert not trainer.training_type_plugin.on_gpu assert not trainer.training_type_plugin.on_tpu assert trainer.training_type_plugin.root_device == torch.device("cpu") @@ -68,11 +68,11 @@ def test_ddp_cpu(): @RunIf(min_gpus=2) def test_ddp_spawn_extra_parameters(tmpdir): - """Tests if device is set correctly when training for DDPSpawnPlugin and tests add_to_queue/get_from_queue with - Lightning Module (deprecated way).""" + """Tests if device is set correctly when training for DDPSpawnStrategy and tests add_to_queue/get_from_queue + with Lightning Module (deprecated way).""" trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2, strategy="ddp_spawn") - assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) + assert isinstance(trainer.training_type_plugin, DDPSpawnStrategy) assert trainer.training_type_plugin.on_gpu assert trainer.training_type_plugin.root_device == torch.device("cuda:0") @@ -85,7 +85,7 @@ def test_ddp_spawn_extra_parameters(tmpdir): assert model.test_val == "test_val" -class TestDDPSpawnPlugin(DDPSpawnPlugin): +class TestDDPSpawnStrategy(DDPSpawnStrategy): def add_to_queue(self, trainer, queue) -> None: queue.put("new_test_val") return super().add_to_queue(trainer, queue) @@ -97,10 +97,10 @@ def get_from_queue(self, trainer: Trainer, queue) -> None: @RunIf(skip_windows=True, skip_49370=True) def test_ddp_spawn_add_get_queue(tmpdir): - """Tests add_to_queue/get_from_queue with DDPSpawnPlugin.""" + """Tests add_to_queue/get_from_queue with DDPSpawnStrategy.""" - ddp_spawn_plugin = TestDDPSpawnPlugin() - trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, num_processes=2, strategy=ddp_spawn_plugin) + ddp_spawn_strategy = TestDDPSpawnStrategy() + trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, num_processes=2, strategy=ddp_spawn_strategy) val: float = 1.0 val_name: str = "val_acc" @@ -108,7 +108,7 @@ def test_ddp_spawn_add_get_queue(tmpdir): dm = BoringDataModule() trainer.fit(model, datamodule=dm) assert trainer.callback_metrics[val_name] == torch.tensor(val) - assert ddp_spawn_plugin.new_test_val == "new_test_val" + assert ddp_spawn_strategy.new_test_val == "new_test_val" class BoringModelDDP(BoringModel): @@ -149,7 +149,7 @@ def test_ddp_spawn_configure_ddp(tmpdir): def test_ddp_spawn_transfer_weights(tmpdir, trainer_fn): """Tests that the spawn plugin transfers the new weights to the main process and deletes the temporary file.""" model = Mock(wraps=BoringModel(), spec=BoringModel) - plugin = DDPSpawnPlugin() + plugin = DDPSpawnStrategy() plugin.model = model trainer = Trainer(default_root_dir=tmpdir) trainer.state.fn = trainer_fn # pretend we are in a particular trainer state diff --git a/tests/plugins/test_plugins_registry.py b/tests/plugins/test_plugins_registry.py index 148e3fd85b7ee..12e668a1e1b14 100644 --- a/tests/plugins/test_plugins_registry.py +++ b/tests/plugins/test_plugins_registry.py @@ -19,8 +19,8 @@ DDPFullyShardedStrategy, DDPPlugin, DDPShardedPlugin, - DDPSpawnPlugin, DDPSpawnShardedPlugin, + DDPSpawnStrategy, DeepSpeedPlugin, TPUSpawnPlugin, TrainingTypePluginsRegistry, @@ -110,7 +110,7 @@ def test_fsdp_plugins_registry(tmpdir): "plugin_name, plugin", [ ("ddp_find_unused_parameters_false", DDPPlugin), - ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), + ("ddp_spawn_find_unused_parameters_false", DDPSpawnStrategy), ("ddp_sharded_spawn_find_unused_parameters_false", DDPSpawnShardedPlugin), ("ddp_sharded_find_unused_parameters_false", DDPShardedPlugin), ], diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 61a75963b26a3..adefc2a278112 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -43,8 +43,8 @@ DDPFullyShardedStrategy, DDPPlugin, DDPShardedPlugin, - DDPSpawnPlugin, DDPSpawnShardedPlugin, + DDPSpawnStrategy, ) from pytorch_lightning.trainer.states import TrainerFn from pytorch_lightning.utilities import _AcceleratorType, _StrategyType @@ -1432,7 +1432,7 @@ def predict( else: results = trainer.predict(model, dataloaders=dataloaders) - if not isinstance(trainer.training_type_plugin, DDPSpawnPlugin): + if not isinstance(trainer.training_type_plugin, DDPSpawnStrategy): if use_callbacks: assert cb.write_on_batch_end_called assert not cb.write_on_epoch_end_called @@ -1530,7 +1530,7 @@ def test_spawn_predict_return_predictions(_, __, accelerator): """Test that `return_predictions=True` raise a MisconfigurationException with spawn training type plugins.""" model = BoringModel() trainer = Trainer(accelerator=accelerator, strategy="ddp_spawn", devices=2, fast_dev_run=True) - assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) + assert isinstance(trainer.training_type_plugin, DDPSpawnStrategy) with pytest.raises(ProcessRaisedException, match="`return_predictions` should be set to `False`"): trainer.predict(model, dataloaders=model.train_dataloader(), return_predictions=True) @@ -2186,11 +2186,11 @@ def training_step(self, batch, batch_idx): ), ), ( - dict(strategy=DDPSpawnPlugin(), num_processes=2, gpus=None), + dict(strategy=DDPSpawnStrategy(), num_processes=2, gpus=None), dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=2), ), ( - dict(strategy=DDPSpawnPlugin(), gpus=2), + dict(strategy=DDPSpawnStrategy(), gpus=2), dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=1), ), ( From 28663c123c7e82e817a1c76cb57065925d5a26cc Mon Sep 17 00:00:00 2001 From: Siyu Wang Date: Fri, 17 Dec 2021 16:37:16 -0800 Subject: [PATCH 3/8] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74e28ac14d9f3..e6b5331f5668e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -132,7 +132,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - DeepSpeed does not require lightning module zero 3 partitioning ([#10655](https://github.com/PyTorchLightning/pytorch-lightning/pull/10655)) -- Renamed the `DDPSpawnPlugin` to `DDPSpawnStrategy` ([#11144](https://github.com/PyTorchLightning/pytorch-lightning/pull/11144)) +- Renamed the `DDPSpawnPlugin` to `DDPSpawnStrategy` ([#11145](https://github.com/PyTorchLightning/pytorch-lightning/pull/11145)) ### Deprecated From 31625f78234d6ed4622918d70bc212d65ddf12e0 Mon Sep 17 00:00:00 2001 From: four4fish <88516121+four4fish@users.noreply.github.com> Date: Mon, 20 Dec 2021 10:31:44 -0800 Subject: [PATCH 4/8] Update tpu_spawn.py --- pytorch_lightning/plugins/training_type/tpu_spawn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py index 7c0b49dcc0b3b..abc5858080389 100644 --- a/pytorch_lightning/plugins/training_type/tpu_spawn.py +++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py @@ -340,7 +340,7 @@ def should_rank_save_checkpoint(self) -> bool: def register_plugins(cls, plugin_registry: Dict) -> None: plugin_registry.register("tpu_spawn_debug", cls, description="TPUSpawn Plugin with `debug` as True", debug=True) - @DDPSpawnPlugin.checkpoint_io.setter + @TPUSpawnPlugin.checkpoint_io.setter def checkpoint_io(self, io: Optional[XLACheckpointIO]) -> None: if io is not None and not isinstance(io, XLACheckpointIO): raise MisconfigurationException(f"{self.__class__.__name__}.checkpoint_io` must be a `XLACheckpointIO`.") From 987a260f1d43a5d8c2973c2f574a1ff7801291d5 Mon Sep 17 00:00:00 2001 From: four4fish <88516121+four4fish@users.noreply.github.com> Date: Mon, 20 Dec 2021 10:51:49 -0800 Subject: [PATCH 5/8] Update tpu_spawn.py --- pytorch_lightning/plugins/training_type/tpu_spawn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py index abc5858080389..ec1c08f6b0b96 100644 --- a/pytorch_lightning/plugins/training_type/tpu_spawn.py +++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py @@ -340,7 +340,7 @@ def should_rank_save_checkpoint(self) -> bool: def register_plugins(cls, plugin_registry: Dict) -> None: plugin_registry.register("tpu_spawn_debug", cls, description="TPUSpawn Plugin with `debug` as True", debug=True) - @TPUSpawnPlugin.checkpoint_io.setter + @DDPSpawnStrategy.checkpoint_io.setter def checkpoint_io(self, io: Optional[XLACheckpointIO]) -> None: if io is not None and not isinstance(io, XLACheckpointIO): raise MisconfigurationException(f"{self.__class__.__name__}.checkpoint_io` must be a `XLACheckpointIO`.") From 67f2f4a1e5bed24c361e16f898a9a5b69482d5f2 Mon Sep 17 00:00:00 2001 From: four4fish <88516121+four4fish@users.noreply.github.com> Date: Mon, 20 Dec 2021 12:39:16 -0800 Subject: [PATCH 6/8] Apply suggestions from code review Co-authored-by: Rohit Gupta --- docs/source/advanced/training_tricks.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/advanced/training_tricks.rst b/docs/source/advanced/training_tricks.rst index 97e86c5efd1c7..5a716efe9fb80 100644 --- a/docs/source/advanced/training_tricks.rst +++ b/docs/source/advanced/training_tricks.rst @@ -178,7 +178,7 @@ For example, when training Graph Neural Networks, a common strategy is to load t A simple way to prevent redundant dataset replicas is to rely on :obj:`torch.multiprocessing` to share the `data automatically between spawned processes via shared memory `_. For this, all data pre-loading should be done on the main process inside :meth:`DataModule.__init__`. -As a result, all tensor-data will get automatically shared when using the :class:`~pytorch_lightning.plugins.DDPSpawnStrategy` training type plugin: +As a result, all tensor-data will get automatically shared when using the :class:`~pytorch_lightning.plugins.DDPSpawnStrategy` training type strategy: .. warning:: From 66b4ed30807e4afac27cb519a671e989e0e6f5ee Mon Sep 17 00:00:00 2001 From: four4fish <88516121+four4fish@users.noreply.github.com> Date: Tue, 21 Dec 2021 14:32:27 -0800 Subject: [PATCH 7/8] Update CHANGELOG.md --- CHANGELOG.md | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea7bbc25e1b55..bcaaa9572daa6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -145,6 +145,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). * Renamed the `DDPShardedPlugin` to `DDPShardedStrategy` ([#11186](https://github.com/PyTorchLightning/pytorch-lightning/pull/11186)) * Renamed the `DDP2Plugin` to `DDP2Strategy` ([#11184](https://github.com/PyTorchLightning/pytorch-lightning/pull/11184)) * Renamed the `SingleTPUPlugin` to `SingleTPUStrategy` ([#11182](https://github.com/PyTorchLightning/pytorch-lightning/pull/11182)) + * Renamed the `DDPSpawnPlugin` to `DDPSpawnStrategy` ([#11145](https://github.com/PyTorchLightning/pytorch-lightning/pull/11145)) + * Renamed the `DDPFullyShardedPlugin` to `DDPFullyShardedStrategy` ([#11143](https://github.com/PyTorchLightning/pytorch-lightning/pull/11143)) - Marked the `ResultCollection`, `ResultMetric`, and `ResultMetricCollection` classes as protected ([#11130](https://github.com/PyTorchLightning/pytorch-lightning/pull/11130)) @@ -153,12 +155,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - DeepSpeed does not require lightning module zero 3 partitioning ([#10655](https://github.com/PyTorchLightning/pytorch-lightning/pull/10655)) -- Renamed the `DDPSpawnPlugin` to `DDPSpawnStrategy` ([#11145](https://github.com/PyTorchLightning/pytorch-lightning/pull/11145)) - - -- Renamed the `DDPFullyShardedPlugin` to `DDPFullyShardedStrategy` ([#11143](https://github.com/PyTorchLightning/pytorch-lightning/pull/11143)) - - ### Deprecated - Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/PyTorchLightning/pytorch-lightning/issues/10103)) From 3b4a54e5ae6f4a9e95fcec533620cbea6e658a0d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 21 Dec 2021 22:32:34 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pytorch_lightning/trainer/connectors/accelerator_connector.py | 2 +- tests/accelerators/test_accelerator_connector.py | 2 +- tests/plugins/test_plugins_registry.py | 2 +- tests/trainer/test_trainer.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index a722922b69f85..f382c62da7d49 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -31,8 +31,8 @@ DDP2Strategy, DDPFullyShardedStrategy, DDPShardedStrategy, - DDPSpawnStrategy, DDPSpawnShardedPlugin, + DDPSpawnStrategy, DDPStrategy, DeepSpeedPrecisionPlugin, DeepSpeedStrategy, diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 49cf726b0fc7e..fd5137d51ed8e 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -28,8 +28,8 @@ DataParallelStrategy, DDP2Strategy, DDPShardedStrategy, - DDPSpawnStrategy, DDPSpawnShardedPlugin, + DDPSpawnStrategy, DDPStrategy, DeepSpeedStrategy, ParallelPlugin, diff --git a/tests/plugins/test_plugins_registry.py b/tests/plugins/test_plugins_registry.py index 9038fa079ff93..a406ec201e218 100644 --- a/tests/plugins/test_plugins_registry.py +++ b/tests/plugins/test_plugins_registry.py @@ -18,8 +18,8 @@ CheckpointIO, DDPFullyShardedStrategy, DDPShardedStrategy, - DDPSpawnStrategy, DDPSpawnShardedPlugin, + DDPSpawnStrategy, DDPStrategy, DeepSpeedStrategy, TPUSpawnStrategy, diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 5c3872a9c3db9..c2e12dd27a79a 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -42,8 +42,8 @@ DDP2Strategy, DDPFullyShardedStrategy, DDPShardedStrategy, - DDPSpawnStrategy, DDPSpawnShardedPlugin, + DDPSpawnStrategy, DDPStrategy, ) from pytorch_lightning.trainer.states import TrainerFn