diff --git a/CHANGELOG.md b/CHANGELOG.md index d5bf513bd9910..bcaaa9572daa6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -145,6 +145,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). * Renamed the `DDPShardedPlugin` to `DDPShardedStrategy` ([#11186](https://github.com/PyTorchLightning/pytorch-lightning/pull/11186)) * Renamed the `DDP2Plugin` to `DDP2Strategy` ([#11184](https://github.com/PyTorchLightning/pytorch-lightning/pull/11184)) * Renamed the `SingleTPUPlugin` to `SingleTPUStrategy` ([#11182](https://github.com/PyTorchLightning/pytorch-lightning/pull/11182)) + * Renamed the `DDPSpawnPlugin` to `DDPSpawnStrategy` ([#11145](https://github.com/PyTorchLightning/pytorch-lightning/pull/11145)) + * Renamed the `DDPFullyShardedPlugin` to `DDPFullyShardedStrategy` ([#11143](https://github.com/PyTorchLightning/pytorch-lightning/pull/11143)) - Marked the `ResultCollection`, `ResultMetric`, and `ResultMetricCollection` classes as protected ([#11130](https://github.com/PyTorchLightning/pytorch-lightning/pull/11130)) @@ -153,9 +155,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - DeepSpeed does not require lightning module zero 3 partitioning ([#10655](https://github.com/PyTorchLightning/pytorch-lightning/pull/10655)) -- Renamed the `DDPFullyShardedPlugin` to `DDPFullyShardedStrategy` ([#11143](https://github.com/PyTorchLightning/pytorch-lightning/pull/11143)) - - ### Deprecated - Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/PyTorchLightning/pytorch-lightning/issues/10103)) diff --git a/docs/source/advanced/training_tricks.rst b/docs/source/advanced/training_tricks.rst index c6be9d12fa843..5a716efe9fb80 100644 --- a/docs/source/advanced/training_tricks.rst +++ b/docs/source/advanced/training_tricks.rst @@ -178,7 +178,7 @@ For example, when training Graph Neural Networks, a common strategy is to load t A simple way to prevent redundant dataset replicas is to rely on :obj:`torch.multiprocessing` to share the `data automatically between spawned processes via shared memory `_. For this, all data pre-loading should be done on the main process inside :meth:`DataModule.__init__`. -As a result, all tensor-data will get automatically shared when using the :class:`~pytorch_lightning.plugins.DDPSpawnPlugin` training type plugin: +As a result, all tensor-data will get automatically shared when using the :class:`~pytorch_lightning.plugins.DDPSpawnStrategy` training type strategy: .. warning:: diff --git a/docs/source/api_references.rst b/docs/source/api_references.rst index ceaca5bdcd87b..14e58e045c2a3 100644 --- a/docs/source/api_references.rst +++ b/docs/source/api_references.rst @@ -154,12 +154,13 @@ Training Type Plugins DDP2Strategy DDPShardedStrategy DDPSpawnShardedPlugin - DDPSpawnPlugin + DDPSpawnStrategy DeepSpeedStrategy HorovodStrategy SingleTPUStrategy TPUSpawnStrategy + Precision Plugins ^^^^^^^^^^^^^^^^^ diff --git a/docs/source/extensions/plugins.rst b/docs/source/extensions/plugins.rst index 21fee42372436..a7720e6785956 100644 --- a/docs/source/extensions/plugins.rst +++ b/docs/source/extensions/plugins.rst @@ -113,13 +113,14 @@ Training Type Plugins DDP2Strategy DDPShardedStrategy DDPSpawnShardedPlugin - DDPSpawnPlugin + DDPSpawnStrategy DeepSpeedStrategy HorovodStrategy SingleTPUStrategy TPUSpawnStrategy + Precision Plugins ----------------- diff --git a/docs/source/guides/speed.rst b/docs/source/guides/speed.rst index 035bd6594afca..c15a888bb5821 100644 --- a/docs/source/guides/speed.rst +++ b/docs/source/guides/speed.rst @@ -95,11 +95,11 @@ This by default comes with a performance hit, and can be disabled in most cases. .. code-block:: python - from pytorch_lightning.plugins import DDPSpawnPlugin + from pytorch_lightning.plugins import DDPSpawnStrategy trainer = pl.Trainer( gpus=2, - strategy=DDPSpawnPlugin(find_unused_parameters=False), + strategy=DDPSpawnStrategy(find_unused_parameters=False), ) When using DDP on a multi-node cluster, set NCCL parameters diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index c755dac5bba68..ca9b8dfe7f96c 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1926,7 +1926,7 @@ def add_to_queue(self, queue: pl.plugins.training_type.ddp_spawn._FakeQueue) -> queue: the instance of the queue to append the data. .. deprecated:: v1.5 - This method was deprecated in v1.5 in favor of `DDPSpawnPlugin.add_to_queue` + This method was deprecated in v1.5 in favor of `DDPSpawnStrategy.add_to_queue` and will be removed in v1.7. """ @@ -1938,7 +1938,7 @@ def get_from_queue(self, queue: pl.plugins.training_type.ddp_spawn._FakeQueue) - queue: the instance of the queue from where to get the data. .. deprecated:: v1.5 - This method was deprecated in v1.5 in favor of `DDPSpawnPlugin.get_from_queue` + This method was deprecated in v1.5 in favor of `DDPSpawnStrategy.get_from_queue` and will be removed in v1.7. """ diff --git a/pytorch_lightning/distributed/dist.py b/pytorch_lightning/distributed/dist.py index 33be0272c4158..1799450e3ce05 100644 --- a/pytorch_lightning/distributed/dist.py +++ b/pytorch_lightning/distributed/dist.py @@ -23,7 +23,8 @@ class LightningDistributed: """ .. deprecated:: v1.5 This class is deprecated in v1.5 and will be removed in v1.7. - The broadcast logic will be moved to the :class:`DDPStrategy` and :class`DDPSpawnPlugin` classes. + The broadcast logic will be moved to the :class:`DDPStrategy` and :class`DDPSpawnStrategy` classes. + """ def __init__(self, rank=None, device=None): diff --git a/pytorch_lightning/lite/lite.py b/pytorch_lightning/lite/lite.py index deba133ef8189..41e23752a1e96 100644 --- a/pytorch_lightning/lite/lite.py +++ b/pytorch_lightning/lite/lite.py @@ -26,7 +26,7 @@ from pytorch_lightning.accelerators.accelerator import Accelerator from pytorch_lightning.lite.wrappers import _LiteDataLoader, _LiteModule, _LiteOptimizer -from pytorch_lightning.plugins import DDPSpawnPlugin, DeepSpeedStrategy, PLUGIN_INPUT, Strategy, TPUSpawnStrategy +from pytorch_lightning.plugins import DDPSpawnStrategy, DeepSpeedStrategy, PLUGIN_INPUT, Strategy, TPUSpawnStrategy from pytorch_lightning.plugins.training_type.training_type_plugin import TBroadcast from pytorch_lightning.trainer.connectors.accelerator_connector import AcceleratorConnector from pytorch_lightning.utilities import _AcceleratorType, _StrategyType, move_data_to_device @@ -310,7 +310,7 @@ def to_device(self, obj: Union[nn.Module, Tensor, Any]) -> Union[nn.Module, Tens """ if isinstance(obj, nn.Module): if self.device.type == "cuda": - # need to call this manually here again in case we spawned with DDPSpawnPlugin + # need to call this manually here again in case we spawned with DDPSpawnStrategy # TODO: refactor to let plugin handle this cleanly torch.cuda.set_device(self.device) return obj.to(self.device) @@ -403,7 +403,7 @@ def _run_impl(self, run_method: Callable, *args: Any, **kwargs: Any) -> Any: # apply sharded context to prevent OOM run_method = partial(self._run_with_sharded_context, run_method) - if isinstance(self._strategy, DDPSpawnPlugin): + if isinstance(self._strategy, DDPSpawnStrategy): return self._strategy.spawn(run_method, *args, **kwargs) else: return run_method(*args, **kwargs) diff --git a/pytorch_lightning/loops/dataloader/prediction_loop.py b/pytorch_lightning/loops/dataloader/prediction_loop.py index 3f227736d0cc2..1c96f077630e6 100644 --- a/pytorch_lightning/loops/dataloader/prediction_loop.py +++ b/pytorch_lightning/loops/dataloader/prediction_loop.py @@ -5,7 +5,7 @@ from pytorch_lightning.loops.dataloader.dataloader_loop import DataLoaderLoop from pytorch_lightning.loops.epoch.prediction_epoch_loop import PredictionEpochLoop -from pytorch_lightning.plugins import DDPSpawnPlugin +from pytorch_lightning.plugins import DDPSpawnStrategy from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.types import _PREDICT_OUTPUT @@ -29,14 +29,14 @@ def return_predictions(self) -> bool: @return_predictions.setter def return_predictions(self, return_predictions: Optional[bool] = None) -> None: - # `DDPSpawnPlugin` plugins and derivatives don't support return predictions. - is_ddp_spawn = isinstance(self.trainer.training_type_plugin, DDPSpawnPlugin) + # `DDPSpawnStrategy` plugins and derivatives don't support return predictions. + is_ddp_spawn = isinstance(self.trainer.training_type_plugin, DDPSpawnStrategy) if return_predictions and is_ddp_spawn: raise MisconfigurationException( - "`return_predictions` should be set to `False` when using the `DDPSpawnPlugin` or children class. " + "`return_predictions` should be set to `False` when using the `DDPSpawnStrategy` or children class. " f"Found {return_predictions} with training_type_plugin {type(self.trainer.training_type_plugin)}." ) - # For non `DDPSpawnPlugin` plugin, the `return_predictions` is True by default unless user decide otherwise. + # For non `DDPSpawnStrategy` plugin, the `return_predictions` is True by default unless user decide otherwise. self._return_predictions = not is_ddp_spawn if return_predictions is None else return_predictions @property diff --git a/pytorch_lightning/plugins/__init__.py b/pytorch_lightning/plugins/__init__.py index d0c4f8ad9c322..a2e984c6718d9 100644 --- a/pytorch_lightning/plugins/__init__.py +++ b/pytorch_lightning/plugins/__init__.py @@ -21,7 +21,7 @@ from pytorch_lightning.plugins.precision.tpu_bf16 import TPUBf16PrecisionPlugin from pytorch_lightning.plugins.training_type.ddp import DDPStrategy from pytorch_lightning.plugins.training_type.ddp2 import DDP2Strategy -from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin +from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnStrategy from pytorch_lightning.plugins.training_type.deepspeed import DeepSpeedStrategy from pytorch_lightning.plugins.training_type.dp import DataParallelStrategy from pytorch_lightning.plugins.training_type.fully_sharded import DDPFullyShardedStrategy @@ -46,7 +46,7 @@ "DataParallelStrategy", "DDP2Strategy", "DDPStrategy", - "DDPSpawnPlugin", + "DDPSpawnStrategy", "DDPFullyShardedStrategy", "DeepSpeedStrategy", "DeepSpeedPrecisionPlugin", diff --git a/pytorch_lightning/plugins/training_type/__init__.py b/pytorch_lightning/plugins/training_type/__init__.py index c021722f7cffe..a4c71da0a1aeb 100644 --- a/pytorch_lightning/plugins/training_type/__init__.py +++ b/pytorch_lightning/plugins/training_type/__init__.py @@ -1,6 +1,6 @@ from pytorch_lightning.plugins.training_type.ddp import DDPStrategy # noqa: F401 from pytorch_lightning.plugins.training_type.ddp2 import DDP2Strategy # noqa: F401 -from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin # noqa: F401 +from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnStrategy # noqa: F401 from pytorch_lightning.plugins.training_type.deepspeed import DeepSpeedStrategy # noqa: F401 from pytorch_lightning.plugins.training_type.dp import DataParallelStrategy # noqa: F401 from pytorch_lightning.plugins.training_type.fully_sharded import DDPFullyShardedStrategy # noqa: F401 diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index 5c25b5078c809..41be1931c2658 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -54,7 +54,7 @@ log = logging.getLogger(__name__) -class DDPSpawnPlugin(ParallelPlugin): +class DDPSpawnStrategy(ParallelPlugin): """Spawns processes using the :func:`torch.multiprocessing.spawn` method and joins processes after training finishes.""" diff --git a/pytorch_lightning/plugins/training_type/sharded_spawn.py b/pytorch_lightning/plugins/training_type/sharded_spawn.py index ee91abbd840c6..4922291251089 100644 --- a/pytorch_lightning/plugins/training_type/sharded_spawn.py +++ b/pytorch_lightning/plugins/training_type/sharded_spawn.py @@ -19,7 +19,7 @@ from torch.optim import Optimizer import pytorch_lightning as pl -from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin +from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnStrategy from pytorch_lightning.trainer.states import TrainerFn from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE, rank_zero_only from pytorch_lightning.utilities.enums import _StrategyType @@ -32,7 +32,7 @@ from pytorch_lightning.overrides.fairscale import LightningShardedDataParallel, unwrap_lightning_module_sharded -class DDPSpawnShardedPlugin(DDPSpawnPlugin): +class DDPSpawnShardedPlugin(DDPSpawnStrategy): """Optimizer sharded training provided by FairScale.""" distributed_backend = _StrategyType.DDP_SHARDED_SPAWN diff --git a/pytorch_lightning/plugins/training_type/tpu_spawn.py b/pytorch_lightning/plugins/training_type/tpu_spawn.py index bb0168c22706e..2e6c579e84764 100644 --- a/pytorch_lightning/plugins/training_type/tpu_spawn.py +++ b/pytorch_lightning/plugins/training_type/tpu_spawn.py @@ -26,7 +26,7 @@ from pytorch_lightning.overrides import LightningDistributedModule from pytorch_lightning.plugins.io.xla_plugin import XLACheckpointIO from pytorch_lightning.plugins.precision import PrecisionPlugin -from pytorch_lightning.plugins.training_type.ddp_spawn import _FakeQueue, _SpawnOutput, DDPSpawnPlugin +from pytorch_lightning.plugins.training_type.ddp_spawn import _FakeQueue, _SpawnOutput, DDPSpawnStrategy from pytorch_lightning.trainer.connectors.data_connector import DataConnector from pytorch_lightning.trainer.states import TrainerFn from pytorch_lightning.utilities import _TPU_AVAILABLE, find_shared_parameters, set_shared_parameters @@ -48,7 +48,7 @@ xm, xmp, MpDeviceLoader, rendezvous = [None] * 4 -class TPUSpawnStrategy(DDPSpawnPlugin): +class TPUSpawnStrategy(DDPSpawnStrategy): """Strategy for training multiple TPU devices using the :func:`torch.multiprocessing.spawn` method.""" def __init__( @@ -348,7 +348,7 @@ def register_plugins(cls, plugin_registry: Dict) -> None: "tpu_spawn_debug", cls, description="TPUSpawn Strategy with `debug` as True", debug=True ) - @DDPSpawnPlugin.checkpoint_io.setter + @DDPSpawnStrategy.checkpoint_io.setter def checkpoint_io(self, io: Optional[XLACheckpointIO]) -> None: if io is not None and not isinstance(io, XLACheckpointIO): raise MisconfigurationException(f"{self.__class__.__name__}.checkpoint_io` must be a `XLACheckpointIO`.") diff --git a/pytorch_lightning/trainer/configuration_validator.py b/pytorch_lightning/trainer/configuration_validator.py index 3a99cb3778e65..a6467f54520f8 100644 --- a/pytorch_lightning/trainer/configuration_validator.py +++ b/pytorch_lightning/trainer/configuration_validator.py @@ -263,12 +263,12 @@ def _check_add_get_queue(model: "pl.LightningModule") -> None: if is_overridden("add_to_queue", model): rank_zero_deprecation( "The `LightningModule.add_to_queue` method was deprecated in v1.5 and will be removed in v1.7 in " - "favor of `DDPSpawnPlugin.add_to_queue`" + "favor of `DDPSpawnStrategy.add_to_queue`" ) if is_overridden("get_from_queue", model): rank_zero_deprecation( "The `LightningModule.get_from_queue` method was deprecated in v1.5 and will be removed in v1.7 in " - "favor of `DDPSpawnPlugin.get_from_queue`" + "favor of `DDPSpawnStrategy.get_from_queue`" ) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index e8f28e731731a..f382c62da7d49 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -31,8 +31,8 @@ DDP2Strategy, DDPFullyShardedStrategy, DDPShardedStrategy, - DDPSpawnPlugin, DDPSpawnShardedPlugin, + DDPSpawnStrategy, DDPStrategy, DeepSpeedPrecisionPlugin, DeepSpeedStrategy, @@ -735,7 +735,7 @@ def select_training_type_plugin(self) -> Strategy: ): ddp_strategy_cls = DDPStrategy elif use_ddp_spawn or use_ddp_cpu_spawn: - ddp_strategy_cls = DDPSpawnPlugin + ddp_strategy_cls = DDPSpawnStrategy elif use_ddp_fully_sharded: ddp_strategy_cls = DDPFullyShardedStrategy else: diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index cc2d60743ac41..fd0ebbf681b9b 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -43,7 +43,7 @@ from pytorch_lightning.loops.utilities import _parse_loop_limits from pytorch_lightning.plugins import ( ApexMixedPrecisionPlugin, - DDPSpawnPlugin, + DDPSpawnStrategy, NativeMixedPrecisionPlugin, ParallelPlugin, PLUGIN_INPUT, @@ -673,7 +673,7 @@ def _call_and_handle_interrupt(self, trainer_fn: Callable, *args: Any, **kwargs: **kwargs: keyword arguments to be passed to `trainer_fn` """ try: - if isinstance(self.training_type_plugin, DDPSpawnPlugin): + if isinstance(self.training_type_plugin, DDPSpawnStrategy): spawn_output: _SpawnOutput = self.training_type_plugin.spawn(trainer_fn, *args, **kwargs) self.training_type_plugin._recover_results_in_main_process(spawn_output, self) return spawn_output.trainer_results @@ -1183,7 +1183,7 @@ def _run( self.state.status = TrainerStatus.FINISHED self.state.stage = None - if isinstance(self.training_type_plugin, DDPSpawnPlugin): + if isinstance(self.training_type_plugin, DDPSpawnStrategy): results = self.training_type_plugin._collect_rank_zero_results(self, results) return results @@ -1420,7 +1420,7 @@ def _handle_meta_model(self) -> None: if not is_on_meta_device(self.lightning_module): return - if isinstance(self.training_type_plugin, DDPSpawnPlugin): + if isinstance(self.training_type_plugin, DDPSpawnStrategy): raise MisconfigurationException("LightningModule on meta device isn't supported with spawn.") materialize_module(self.lightning_module) diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 061b7c5cf41cf..fd5137d51ed8e 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -28,8 +28,8 @@ DataParallelStrategy, DDP2Strategy, DDPShardedStrategy, - DDPSpawnPlugin, DDPSpawnShardedPlugin, + DDPSpawnStrategy, DDPStrategy, DeepSpeedStrategy, ParallelPlugin, @@ -58,7 +58,7 @@ def test_accelerator_choice_ddp_cpu(tmpdir, num_processes: int, num_nodes: int): trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", num_processes=num_processes, num_nodes=num_nodes) assert isinstance(trainer.accelerator, CPUAccelerator) no_spawn = num_processes == 1 and num_nodes > 1 - assert isinstance(trainer.training_type_plugin, DDPStrategy if no_spawn else DDPSpawnPlugin) + assert isinstance(trainer.training_type_plugin, DDPStrategy if no_spawn else DDPSpawnStrategy) assert isinstance(trainer.training_type_plugin.cluster_environment, LightningEnvironment) @@ -80,7 +80,7 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock): with pytest.deprecated_call(match=r"accelerator='ddp_spawn'\)` has been deprecated"): trainer = Trainer(fast_dev_run=True, accelerator="ddp_spawn", gpus=1) assert isinstance(trainer.accelerator, GPUAccelerator) - assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) + assert isinstance(trainer.training_type_plugin, DDPSpawnStrategy) assert isinstance(trainer.training_type_plugin.cluster_environment, LightningEnvironment) @@ -272,7 +272,7 @@ def test_accelerator_choice_ddp_cpu_and_strategy(tmpdir): @RunIf(skip_windows=True, skip_49370=True) def test_accelerator_choice_ddp_cpu_and_strategy_spawn(tmpdir): """Test that accelerator="ddp_cpu" can work together with an instance of DDPPSpawnPlugin.""" - _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class=DDPSpawnPlugin) + _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class=DDPSpawnStrategy) def _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class): @@ -417,7 +417,7 @@ def test_plugin_accelerator_choice(accelerator: Optional[str], plugin: str): ["accelerator", "plugin"], [ ("ddp", DDPStrategy), - ("ddp_spawn", DDPSpawnPlugin), + ("ddp_spawn", DDPSpawnStrategy), ("ddp_sharded", DDPShardedStrategy), ("ddp_sharded_spawn", DDPSpawnShardedPlugin), pytest.param("deepspeed", DeepSpeedStrategy, marks=RunIf(deepspeed=True)), @@ -486,7 +486,7 @@ def test_accelerator_cpu_with_multiple_gpus(): assert isinstance(trainer.accelerator, CPUAccelerator) -@pytest.mark.parametrize(["devices", "plugin"], [(1, SingleDevicePlugin), (5, DDPSpawnPlugin)]) +@pytest.mark.parametrize(["devices", "plugin"], [(1, SingleDevicePlugin), (5, DDPSpawnStrategy)]) def test_accelerator_cpu_with_devices(devices, plugin): trainer = Trainer(accelerator="cpu", devices=devices) @@ -508,7 +508,7 @@ def test_accelerator_cpu_with_num_processes_priority(): @RunIf(min_gpus=2) @pytest.mark.parametrize( - ["devices", "plugin"], [(1, SingleDevicePlugin), ([1], SingleDevicePlugin), (2, DDPSpawnPlugin)] + ["devices", "plugin"], [(1, SingleDevicePlugin), ([1], SingleDevicePlugin), (2, DDPSpawnStrategy)] ) def test_accelerator_gpu_with_devices(devices, plugin): @@ -602,8 +602,8 @@ def test_exception_invalid_strategy(): @pytest.mark.parametrize( ["strategy", "plugin"], [ - ("ddp_spawn", DDPSpawnPlugin), - ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), + ("ddp_spawn", DDPSpawnStrategy), + ("ddp_spawn_find_unused_parameters_false", DDPSpawnStrategy), ("ddp", DDPStrategy), ("ddp_find_unused_parameters_false", DDPStrategy), ], @@ -613,7 +613,7 @@ def test_strategy_choice_cpu_str(tmpdir, strategy, plugin): assert isinstance(trainer.training_type_plugin, plugin) -@pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPStrategy]) +@pytest.mark.parametrize("plugin", [DDPSpawnStrategy, DDPStrategy]) def test_strategy_choice_cpu_plugin(tmpdir, plugin): trainer = Trainer(strategy=plugin(), accelerator="cpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @@ -623,8 +623,8 @@ def test_strategy_choice_cpu_plugin(tmpdir, plugin): @pytest.mark.parametrize( ["strategy", "plugin"], [ - ("ddp_spawn", DDPSpawnPlugin), - ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), + ("ddp_spawn", DDPSpawnStrategy), + ("ddp_spawn_find_unused_parameters_false", DDPSpawnStrategy), ("ddp", DDPStrategy), ("ddp_find_unused_parameters_false", DDPStrategy), ("ddp2", DDP2Strategy), @@ -640,14 +640,14 @@ def test_strategy_choice_gpu_str(tmpdir, strategy, plugin): @RunIf(min_gpus=2) -@pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPStrategy]) +@pytest.mark.parametrize("plugin", [DDPSpawnStrategy, DDPStrategy]) def test_strategy_choice_gpu_plugin(tmpdir, plugin): trainer = Trainer(strategy=plugin(), accelerator="gpu", devices=2) assert isinstance(trainer.training_type_plugin, plugin) @RunIf(min_gpus=2) -@pytest.mark.parametrize("plugin", [DDPSpawnPlugin, DDPStrategy]) +@pytest.mark.parametrize("plugin", [DDPSpawnStrategy, DDPStrategy]) def test_device_type_when_training_plugin_gpu_passed(tmpdir, plugin): trainer = Trainer(strategy=plugin(), gpus=2) @@ -671,7 +671,7 @@ def test_amp_level_raises_error_with_native(): def test_strategy_choice_ddp_spawn_cpu(tmpdir): trainer = Trainer(fast_dev_run=True, strategy="ddp_spawn", num_processes=2) assert isinstance(trainer.accelerator, CPUAccelerator) - assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) + assert isinstance(trainer.training_type_plugin, DDPSpawnStrategy) assert isinstance(trainer.training_type_plugin.cluster_environment, LightningEnvironment) @@ -691,7 +691,7 @@ def test_strategy_choice_ddp(cuda_available_mock, device_count_mock): def test_strategy_choice_ddp_spawn(cuda_available_mock, device_count_mock): trainer = Trainer(fast_dev_run=True, strategy="ddp_spawn", gpus=1) assert isinstance(trainer.accelerator, GPUAccelerator) - assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) + assert isinstance(trainer.training_type_plugin, DDPSpawnStrategy) assert isinstance(trainer.training_type_plugin.cluster_environment, LightningEnvironment) diff --git a/tests/callbacks/test_stochastic_weight_avg.py b/tests/callbacks/test_stochastic_weight_avg.py index effbffec53261..1ffaadcd00e58 100644 --- a/tests/callbacks/test_stochastic_weight_avg.py +++ b/tests/callbacks/test_stochastic_weight_avg.py @@ -22,7 +22,7 @@ from pytorch_lightning import LightningModule, Trainer from pytorch_lightning.callbacks import StochasticWeightAveraging -from pytorch_lightning.plugins import DDPSpawnPlugin +from pytorch_lightning.plugins import DDPSpawnStrategy from pytorch_lightning.plugins.training_type import Strategy from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers.boring_model import BoringModel, RandomDataset, RandomIterableDataset @@ -99,7 +99,7 @@ def on_train_end(self, trainer, pl_module): assert trainer.accumulate_grad_batches == 2 assert trainer.num_training_batches == 5 - if not isinstance(trainer.training_type_plugin, DDPSpawnPlugin): + if not isinstance(trainer.training_type_plugin, DDPSpawnStrategy): # check backward call count. the batchnorm update epoch should not backward assert trainer.training_type_plugin.backward.call_count == trainer.max_epochs * trainer.limit_train_batches diff --git a/tests/lite/test_parity.py b/tests/lite/test_parity.py index d4d0ca6e5e9c7..207478caf4eab 100644 --- a/tests/lite/test_parity.py +++ b/tests/lite/test_parity.py @@ -29,7 +29,7 @@ from pytorch_lightning.lite import LightningLite from pytorch_lightning.plugins.environments.lightning_environment import find_free_network_port -from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin +from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnStrategy from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device from pytorch_lightning.utilities.cloud_io import atomic_save from tests.helpers.boring_model import RandomDataset @@ -86,7 +86,7 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, num_epochs: int = self.backward(loss) optimizer.step() - if isinstance(self._strategy, DDPSpawnPlugin) and tmpdir and self.global_rank == 0: + if isinstance(self._strategy, DDPSpawnStrategy) and tmpdir and self.global_rank == 0: checkpoint_path = os.path.join(tmpdir, "model.pt") atomic_save(model.state_dict(), checkpoint_path) return checkpoint_path diff --git a/tests/models/test_sync_batchnorm.py b/tests/models/test_sync_batchnorm.py index 5035e71f928fc..d490fbdae41e7 100644 --- a/tests/models/test_sync_batchnorm.py +++ b/tests/models/test_sync_batchnorm.py @@ -17,7 +17,7 @@ import torch.nn.functional as F from pytorch_lightning import LightningModule, seed_everything, Trainer -from pytorch_lightning.plugins import DDPSpawnPlugin +from pytorch_lightning.plugins import DDPSpawnStrategy from pytorch_lightning.plugins.environments import LightningEnvironment from pytorch_lightning.utilities import FLOAT16_EPSILON from tests.helpers.datamodules import MNISTDataModule @@ -105,7 +105,7 @@ def test_sync_batchnorm_ddp(tmpdir): dm.setup(stage=None) model = SyncBNModule(gpu_count=2, bn_targets=bn_outputs) - ddp = DDPSpawnPlugin( + ddp = DDPSpawnStrategy( parallel_devices=[torch.device("cuda", 0), torch.device("cuda", 1)], num_nodes=1, sync_batchnorm=True, diff --git a/tests/plugins/test_ddp_plugin_with_comm_hook.py b/tests/plugins/test_ddp_plugin_with_comm_hook.py index b433eea780ead..2a5872fc6221c 100644 --- a/tests/plugins/test_ddp_plugin_with_comm_hook.py +++ b/tests/plugins/test_ddp_plugin_with_comm_hook.py @@ -14,7 +14,7 @@ import torch from pytorch_lightning import Trainer -from pytorch_lightning.plugins import DDPSpawnPlugin, DDPStrategy +from pytorch_lightning.plugins import DDPSpawnStrategy, DDPStrategy from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_8, _TORCH_GREATER_EQUAL_1_10 from tests.helpers import BoringModel from tests.helpers.runif import RunIf @@ -97,7 +97,7 @@ def test_ddp_fp16_compress_wrap_sgd_comm_hook(tmpdir): def test_ddp_spawn_fp16_compress_comm_hook(tmpdir): """Test for DDP Spawn FP16 compress hook.""" model = BoringModel() - training_type_plugin = DDPSpawnPlugin(ddp_comm_hook=default.fp16_compress_hook) + training_type_plugin = DDPSpawnStrategy(ddp_comm_hook=default.fp16_compress_hook) trainer = Trainer( max_epochs=1, gpus=2, diff --git a/tests/plugins/test_ddp_spawn_plugin.py b/tests/plugins/test_ddp_spawn_plugin.py index c8c861050d844..5f0074dcd7718 100644 --- a/tests/plugins/test_ddp_spawn_plugin.py +++ b/tests/plugins/test_ddp_spawn_plugin.py @@ -19,7 +19,7 @@ from torch.nn.parallel.distributed import DistributedDataParallel from pytorch_lightning import LightningModule, Trainer -from pytorch_lightning.plugins import DDPSpawnPlugin +from pytorch_lightning.plugins import DDPSpawnStrategy from pytorch_lightning.trainer.states import TrainerFn from tests.helpers.boring_model import BoringDataModule, BoringModel from tests.helpers.runif import RunIf @@ -52,11 +52,11 @@ def get_from_queue(self, queue) -> None: @RunIf(skip_windows=True, skip_49370=True) def test_ddp_cpu(): - """Tests if device is set correctly when training for DDPSpawnPlugin.""" + """Tests if device is set correctly when training for DDPSpawnStrategy.""" trainer = Trainer(num_processes=2, fast_dev_run=True) # assert training type plugin attributes for device setting - assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) + assert isinstance(trainer.training_type_plugin, DDPSpawnStrategy) assert not trainer.training_type_plugin.on_gpu assert not trainer.training_type_plugin.on_tpu assert trainer.training_type_plugin.root_device == torch.device("cpu") @@ -68,11 +68,11 @@ def test_ddp_cpu(): @RunIf(min_gpus=2) def test_ddp_spawn_extra_parameters(tmpdir): - """Tests if device is set correctly when training for DDPSpawnPlugin and tests add_to_queue/get_from_queue with - Lightning Module (deprecated way).""" + """Tests if device is set correctly when training for DDPSpawnStrategy and tests add_to_queue/get_from_queue + with Lightning Module (deprecated way).""" trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2, strategy="ddp_spawn") - assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) + assert isinstance(trainer.training_type_plugin, DDPSpawnStrategy) assert trainer.training_type_plugin.on_gpu assert trainer.training_type_plugin.root_device == torch.device("cuda:0") @@ -85,7 +85,7 @@ def test_ddp_spawn_extra_parameters(tmpdir): assert model.test_val == "test_val" -class TestDDPSpawnPlugin(DDPSpawnPlugin): +class TestDDPSpawnStrategy(DDPSpawnStrategy): def add_to_queue(self, trainer, queue) -> None: queue.put("new_test_val") return super().add_to_queue(trainer, queue) @@ -97,10 +97,10 @@ def get_from_queue(self, trainer: Trainer, queue) -> None: @RunIf(skip_windows=True, skip_49370=True) def test_ddp_spawn_add_get_queue(tmpdir): - """Tests add_to_queue/get_from_queue with DDPSpawnPlugin.""" + """Tests add_to_queue/get_from_queue with DDPSpawnStrategy.""" - ddp_spawn_plugin = TestDDPSpawnPlugin() - trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, num_processes=2, strategy=ddp_spawn_plugin) + ddp_spawn_strategy = TestDDPSpawnStrategy() + trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, num_processes=2, strategy=ddp_spawn_strategy) val: float = 1.0 val_name: str = "val_acc" @@ -108,7 +108,7 @@ def test_ddp_spawn_add_get_queue(tmpdir): dm = BoringDataModule() trainer.fit(model, datamodule=dm) assert trainer.callback_metrics[val_name] == torch.tensor(val) - assert ddp_spawn_plugin.new_test_val == "new_test_val" + assert ddp_spawn_strategy.new_test_val == "new_test_val" class BoringModelDDP(BoringModel): @@ -149,7 +149,7 @@ def test_ddp_spawn_configure_ddp(tmpdir): def test_ddp_spawn_transfer_weights(tmpdir, trainer_fn): """Tests that the spawn plugin transfers the new weights to the main process and deletes the temporary file.""" model = Mock(wraps=BoringModel(), spec=BoringModel) - plugin = DDPSpawnPlugin() + plugin = DDPSpawnStrategy() plugin.model = model trainer = Trainer(default_root_dir=tmpdir) trainer.state.fn = trainer_fn # pretend we are in a particular trainer state diff --git a/tests/plugins/test_plugins_registry.py b/tests/plugins/test_plugins_registry.py index 4f6342d4ae18b..a406ec201e218 100644 --- a/tests/plugins/test_plugins_registry.py +++ b/tests/plugins/test_plugins_registry.py @@ -18,8 +18,8 @@ CheckpointIO, DDPFullyShardedStrategy, DDPShardedStrategy, - DDPSpawnPlugin, DDPSpawnShardedPlugin, + DDPSpawnStrategy, DDPStrategy, DeepSpeedStrategy, TPUSpawnStrategy, @@ -110,7 +110,7 @@ def test_fsdp_strategys_registry(tmpdir): "plugin_name, plugin", [ ("ddp_find_unused_parameters_false", DDPStrategy), - ("ddp_spawn_find_unused_parameters_false", DDPSpawnPlugin), + ("ddp_spawn_find_unused_parameters_false", DDPSpawnStrategy), ("ddp_sharded_spawn_find_unused_parameters_false", DDPSpawnShardedPlugin), ("ddp_sharded_find_unused_parameters_false", DDPShardedStrategy), ], diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 9556901f930fe..c2e12dd27a79a 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -42,8 +42,8 @@ DDP2Strategy, DDPFullyShardedStrategy, DDPShardedStrategy, - DDPSpawnPlugin, DDPSpawnShardedPlugin, + DDPSpawnStrategy, DDPStrategy, ) from pytorch_lightning.trainer.states import TrainerFn @@ -1432,7 +1432,7 @@ def predict( else: results = trainer.predict(model, dataloaders=dataloaders) - if not isinstance(trainer.training_type_plugin, DDPSpawnPlugin): + if not isinstance(trainer.training_type_plugin, DDPSpawnStrategy): if use_callbacks: assert cb.write_on_batch_end_called assert not cb.write_on_epoch_end_called @@ -1530,7 +1530,7 @@ def test_spawn_predict_return_predictions(_, __, accelerator): """Test that `return_predictions=True` raise a MisconfigurationException with spawn training type plugins.""" model = BoringModel() trainer = Trainer(accelerator=accelerator, strategy="ddp_spawn", devices=2, fast_dev_run=True) - assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin) + assert isinstance(trainer.training_type_plugin, DDPSpawnStrategy) with pytest.raises(ProcessRaisedException, match="`return_predictions` should be set to `False`"): trainer.predict(model, dataloaders=model.train_dataloader(), return_predictions=True) @@ -2186,11 +2186,11 @@ def training_step(self, batch, batch_idx): ), ), ( - dict(strategy=DDPSpawnPlugin(), num_processes=2, gpus=None), + dict(strategy=DDPSpawnStrategy(), num_processes=2, gpus=None), dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.CPU, num_gpus=0, num_processes=2), ), ( - dict(strategy=DDPSpawnPlugin(), gpus=2), + dict(strategy=DDPSpawnStrategy(), gpus=2), dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=_AcceleratorType.GPU, num_gpus=2, num_processes=1), ), (