diff --git a/CHANGELOG.md b/CHANGELOG.md index 759f1d74ed61d..6fa1418106226 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed support for passing a bool value to `profiler` argument of Trainer ([#6164](https://github.com/PyTorchLightning/pytorch-lightning/pull/6164)) +- Removed deprecated Trainer argument `enable_pl_optimizer` and `automatic_optimization` ([#6163](https://github.com/PyTorchLightning/pytorch-lightning/pull/6163)) + + ### Fixed - Made the `Plugin.reduce` method more consistent across all Plugins to reflect a mean-reduction by default ([#6011](https://github.com/PyTorchLightning/pytorch-lightning/pull/6011)) diff --git a/docs/source/common/optimizers.rst b/docs/source/common/optimizers.rst index 3f7cd7f224a97..f1b9d3623278a 100644 --- a/docs/source/common/optimizers.rst +++ b/docs/source/common/optimizers.rst @@ -300,8 +300,6 @@ override the :meth:`optimizer_step` function. For example, here step optimizer A every 2 batches and optimizer B every 4 batches -.. note:: When using Trainer(enable_pl_optimizer=True), there is no need to call `.zero_grad()`. - .. testcode:: def optimizer_zero_grad(self, current_epoch, batch_idx, optimizer, opt_idx): diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 57aa264244a68..c4d63cff4637b 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1324,9 +1324,6 @@ def optimizer_step( By default, Lightning calls ``step()`` and ``zero_grad()`` as shown in the example once per optimizer. - .. tip:: With ``Trainer(enable_pl_optimizer=True)``, you can use ``optimizer.step()`` directly - and it will handle zero_grad, accumulated gradients, AMP, TPU and more automatically for you. - Warning: If you are overriding this method, make sure that you pass the ``optimizer_closure`` parameter to ``optimizer.step()`` function as shown in the examples. This ensures that diff --git a/pytorch_lightning/trainer/connectors/optimizer_connector.py b/pytorch_lightning/trainer/connectors/optimizer_connector.py index 5fb7b698b1669..019dd302b45ee 100644 --- a/pytorch_lightning/trainer/connectors/optimizer_connector.py +++ b/pytorch_lightning/trainer/connectors/optimizer_connector.py @@ -20,12 +20,7 @@ class OptimizerConnector: def __init__(self, trainer): self.trainer = trainer - def on_trainer_init(self, enable_pl_optimizer): - if enable_pl_optimizer is not None: - rank_zero_warn( - "Trainer argument `enable_pl_optimizer` is deprecated in v1.1.3. It will be removed in v1.3.0", - DeprecationWarning - ) + def on_trainer_init(self): self.trainer.lr_schedulers = [] self.trainer.optimizers = [] self.trainer.optimizer_frequencies = [] diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 19cf5a02a6a93..3c564542f6891 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -135,9 +135,7 @@ def __init__( amp_backend: str = 'native', amp_level: str = 'O2', distributed_backend: Optional[str] = None, - automatic_optimization: Optional[bool] = None, move_metrics_to_cpu: bool = False, - enable_pl_optimizer: bool = None, # todo: remove in v1.3 multiple_trainloader_mode: str = 'max_size_cycle', stochastic_weight_avg: bool = False ): @@ -213,10 +211,6 @@ def __init__( log_every_n_steps: How often to log within steps (defaults to every 50 steps). - automatic_optimization: If False you are responsible for calling .backward, .step, zero_grad - in LightningModule. This argument has been moved to LightningModule. It is deprecated - here in v1.1 and will be removed in v1.3. - prepare_data_per_node: If True, each LOCAL_RANK=0 will call prepare data. Otherwise only NODE_RANK=0, LOCAL_RANK=0 will prepare data @@ -288,11 +282,6 @@ def __init__( move_metrics_to_cpu: Whether to force internal logged metrics to be moved to cpu. This can save some gpu memory, but can make training slower. Use with attention. - enable_pl_optimizer: If True, each optimizer will be wrapped by - `pytorch_lightning.core.optimizer.LightningOptimizer`. It allows Lightning to - handle AMP, TPU, accumulated_gradients, etc. - .. warning:: Currently deprecated and it will be removed in v1.3 - multiple_trainloader_mode: How to loop over the datasets when there are multiple train loaders. In 'max_size_cycle' mode, the trainer ends one epoch when the largest dataset is traversed, and smaller datasets reload when running out of their data. In 'min_size' mode, all the datasets @@ -345,7 +334,7 @@ def __init__( self.on_init_start() # init optimizer + lr scheduler related flags - self.optimizer_connector.on_trainer_init(enable_pl_optimizer) + self.optimizer_connector.on_trainer_init() # init data flags self.data_connector.on_trainer_init( @@ -356,23 +345,12 @@ def __init__( self.training_tricks_connector.on_trainer_init( gradient_clip_val, track_grad_norm, accumulate_grad_batches, truncated_bptt_steps, terminate_on_nan ) - - # init train loop related flags - # TODO: remove in 1.3.0 - if automatic_optimization is None: - automatic_optimization = True - else: - rank_zero_warn( - "Disable automatic optimization with the trainer flag is deprecated and will be removed in v1.3.0!" - "Please use the property on the LightningModule for disabling automatic optimization" - ) self.train_loop.on_trainer_init( max_epochs, min_epochs, max_steps, min_steps, num_sanity_val_steps, - automatic_optimization, weights_summary, ) self.evaluation_loop.on_trainer_init() diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index d2298c8c4e860..14408ec7695be 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -58,7 +58,6 @@ def on_trainer_init( max_steps, min_steps, num_sanity_val_steps, - automatic_optimization, weights_summary, ): self.trainer.global_step = 0 @@ -71,7 +70,6 @@ def on_trainer_init( self.trainer.batch_idx = 0 self.trainer.num_training_batches = 0 self.trainer.train_dataloader = None - self.automatic_optimization = automatic_optimization # If neither max_epochs or max_steps is set, then use existing default of max_epochs = 1000 self.trainer.max_epochs = 1000 if (max_epochs is None and max_steps is None) else max_epochs diff --git a/tests/deprecated_api/test_remove_1-3.py b/tests/deprecated_api/test_remove_1-3.py index ad2aa18aecc95..54a96045b08a7 100644 --- a/tests/deprecated_api/test_remove_1-3.py +++ b/tests/deprecated_api/test_remove_1-3.py @@ -106,8 +106,3 @@ def test_v1_3_0_deprecated_metrics(): torch.randint(10, 20, (50, )).float(), torch.randint(1, 100, (50, )).float() ) - - -def test_trainer_enable_pl_optimizer(tmpdir): - with pytest.deprecated_call(match='will be removed in v1.3'): - Trainer(enable_pl_optimizer=True) diff --git a/tests/plugins/test_rpc_sequential_plugin.py b/tests/plugins/test_rpc_sequential_plugin.py index 8be0190566df6..ae612c6631625 100644 --- a/tests/plugins/test_rpc_sequential_plugin.py +++ b/tests/plugins/test_rpc_sequential_plugin.py @@ -42,7 +42,6 @@ def test_rpc_sequential_plugin_manual(tmpdir, args=None): gpus=2, distributed_backend="ddp", plugins=[RPCSequentialPlugin(balance=[2, 1], rpc_timeout_sec=5 * 60)], - enable_pl_optimizer=True, ) trainer.fit(model) diff --git a/tests/utilities/test_all_gather_grad.py b/tests/utilities/test_all_gather_grad.py index f82cfc94bcce2..1be6c6fe0486c 100644 --- a/tests/utilities/test_all_gather_grad.py +++ b/tests/utilities/test_all_gather_grad.py @@ -89,7 +89,6 @@ def training_epoch_end(self, outputs) -> None: max_epochs=1, log_every_n_steps=1, accumulate_grad_batches=2, - enable_pl_optimizer=True, gpus=2, accelerator="ddp", )