From 930a269759283158ce0d7949687a82b289208087 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <Borda@users.noreply.github.com>
Date: Wed, 24 Feb 2021 11:01:24 +0100
Subject: [PATCH] prune deprecated Trainer arg `enable_pl_optimizer` (#6163)

* prune enable_pl_optimizer

* prune automatic_optimization
---
 CHANGELOG.md                                  |  3 +++
 docs/source/common/optimizers.rst             |  2 --
 pytorch_lightning/core/lightning.py           |  3 ---
 .../trainer/connectors/optimizer_connector.py |  7 +-----
 pytorch_lightning/trainer/trainer.py          | 24 +------------------
 pytorch_lightning/trainer/training_loop.py    |  2 --
 tests/deprecated_api/test_remove_1-3.py       |  5 ----
 tests/plugins/test_rpc_sequential_plugin.py   |  1 -
 tests/utilities/test_all_gather_grad.py       |  1 -
 9 files changed, 5 insertions(+), 43 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f4eec0a47a4f57..b3ac4faf9ccfe1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed support for passing a bool value to `profiler` argument of Trainer ([#6164](https://github.com/PyTorchLightning/pytorch-lightning/pull/6164))
 
 
+- Removed deprecated Trainer argument `enable_pl_optimizer` and `automatic_optimization` ([#6163](https://github.com/PyTorchLightning/pytorch-lightning/pull/6163))
+
+
 ### Fixed
 
 - Made the `Plugin.reduce` method more consistent across all Plugins to reflect a mean-reduction by default ([#6011](https://github.com/PyTorchLightning/pytorch-lightning/pull/6011))
diff --git a/docs/source/common/optimizers.rst b/docs/source/common/optimizers.rst
index 3f7cd7f224a978..f1b9d3623278a5 100644
--- a/docs/source/common/optimizers.rst
+++ b/docs/source/common/optimizers.rst
@@ -300,8 +300,6 @@ override the :meth:`optimizer_step` function.
 
 For example, here step optimizer A every 2 batches and optimizer B every 4 batches
 
-.. note:: When using Trainer(enable_pl_optimizer=True), there is no need to call `.zero_grad()`.
-
 .. testcode::
 
     def optimizer_zero_grad(self, current_epoch, batch_idx, optimizer, opt_idx):
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 57aa264244a68a..c4d63cff4637b3 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -1324,9 +1324,6 @@ def optimizer_step(
         By default, Lightning calls ``step()`` and ``zero_grad()`` as shown in the example
         once per optimizer.
 
-        .. tip:: With ``Trainer(enable_pl_optimizer=True)``, you can use ``optimizer.step()`` directly
-         and it will handle zero_grad, accumulated gradients, AMP, TPU and more automatically for you.
-
         Warning:
             If you are overriding this method, make sure that you pass the ``optimizer_closure`` parameter
             to ``optimizer.step()`` function as shown in the examples. This ensures that
diff --git a/pytorch_lightning/trainer/connectors/optimizer_connector.py b/pytorch_lightning/trainer/connectors/optimizer_connector.py
index 5fb7b698b1669b..019dd302b45eeb 100644
--- a/pytorch_lightning/trainer/connectors/optimizer_connector.py
+++ b/pytorch_lightning/trainer/connectors/optimizer_connector.py
@@ -20,12 +20,7 @@ class OptimizerConnector:
     def __init__(self, trainer):
         self.trainer = trainer
 
-    def on_trainer_init(self, enable_pl_optimizer):
-        if enable_pl_optimizer is not None:
-            rank_zero_warn(
-                "Trainer argument `enable_pl_optimizer` is deprecated in v1.1.3. It will be removed in v1.3.0",
-                DeprecationWarning
-            )
+    def on_trainer_init(self):
         self.trainer.lr_schedulers = []
         self.trainer.optimizers = []
         self.trainer.optimizer_frequencies = []
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 19cf5a02a6a936..3c564542f6891c 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -135,9 +135,7 @@ def __init__(
         amp_backend: str = 'native',
         amp_level: str = 'O2',
         distributed_backend: Optional[str] = None,
-        automatic_optimization: Optional[bool] = None,
         move_metrics_to_cpu: bool = False,
-        enable_pl_optimizer: bool = None,  # todo: remove in v1.3
         multiple_trainloader_mode: str = 'max_size_cycle',
         stochastic_weight_avg: bool = False
     ):
@@ -213,10 +211,6 @@ def __init__(
 
             log_every_n_steps: How often to log within steps (defaults to every 50 steps).
 
-            automatic_optimization: If False you are responsible for calling .backward, .step, zero_grad
-                in LightningModule. This argument has been moved to LightningModule. It is deprecated
-                here in v1.1 and will be removed in v1.3.
-
             prepare_data_per_node: If True, each LOCAL_RANK=0 will call prepare data.
                 Otherwise only NODE_RANK=0, LOCAL_RANK=0 will prepare data
 
@@ -288,11 +282,6 @@ def __init__(
             move_metrics_to_cpu: Whether to force internal logged metrics to be moved to cpu.
                 This can save some gpu memory, but can make training slower. Use with attention.
 
-            enable_pl_optimizer: If True, each optimizer will be wrapped by
-                `pytorch_lightning.core.optimizer.LightningOptimizer`. It allows Lightning to
-                handle AMP, TPU, accumulated_gradients, etc.
-                .. warning:: Currently deprecated and it will be removed in v1.3
-
             multiple_trainloader_mode: How to loop over the datasets when there are multiple train loaders.
                 In 'max_size_cycle' mode, the trainer ends one epoch when the largest dataset is traversed,
                 and smaller datasets reload when running out of their data. In 'min_size' mode, all the datasets
@@ -345,7 +334,7 @@ def __init__(
         self.on_init_start()
 
         # init optimizer + lr scheduler related flags
-        self.optimizer_connector.on_trainer_init(enable_pl_optimizer)
+        self.optimizer_connector.on_trainer_init()
 
         # init data flags
         self.data_connector.on_trainer_init(
@@ -356,23 +345,12 @@ def __init__(
         self.training_tricks_connector.on_trainer_init(
             gradient_clip_val, track_grad_norm, accumulate_grad_batches, truncated_bptt_steps, terminate_on_nan
         )
-
-        # init train loop related flags
-        # TODO: remove in 1.3.0
-        if automatic_optimization is None:
-            automatic_optimization = True
-        else:
-            rank_zero_warn(
-                "Disable automatic optimization with the trainer flag is deprecated and will be removed in v1.3.0!"
-                "Please use the property on the LightningModule for disabling automatic optimization"
-            )
         self.train_loop.on_trainer_init(
             max_epochs,
             min_epochs,
             max_steps,
             min_steps,
             num_sanity_val_steps,
-            automatic_optimization,
             weights_summary,
         )
         self.evaluation_loop.on_trainer_init()
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index d2298c8c4e860d..14408ec7695bed 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -58,7 +58,6 @@ def on_trainer_init(
         max_steps,
         min_steps,
         num_sanity_val_steps,
-        automatic_optimization,
         weights_summary,
     ):
         self.trainer.global_step = 0
@@ -71,7 +70,6 @@ def on_trainer_init(
         self.trainer.batch_idx = 0
         self.trainer.num_training_batches = 0
         self.trainer.train_dataloader = None
-        self.automatic_optimization = automatic_optimization
 
         # If neither max_epochs or max_steps is set, then use existing default of max_epochs = 1000
         self.trainer.max_epochs = 1000 if (max_epochs is None and max_steps is None) else max_epochs
diff --git a/tests/deprecated_api/test_remove_1-3.py b/tests/deprecated_api/test_remove_1-3.py
index ad2aa18aecc954..54a96045b08a72 100644
--- a/tests/deprecated_api/test_remove_1-3.py
+++ b/tests/deprecated_api/test_remove_1-3.py
@@ -106,8 +106,3 @@ def test_v1_3_0_deprecated_metrics():
             torch.randint(10, 20, (50, )).float(),
             torch.randint(1, 100, (50, )).float()
         )
-
-
-def test_trainer_enable_pl_optimizer(tmpdir):
-    with pytest.deprecated_call(match='will be removed in v1.3'):
-        Trainer(enable_pl_optimizer=True)
diff --git a/tests/plugins/test_rpc_sequential_plugin.py b/tests/plugins/test_rpc_sequential_plugin.py
index 8be0190566df63..ae612c66316258 100644
--- a/tests/plugins/test_rpc_sequential_plugin.py
+++ b/tests/plugins/test_rpc_sequential_plugin.py
@@ -42,7 +42,6 @@ def test_rpc_sequential_plugin_manual(tmpdir, args=None):
         gpus=2,
         distributed_backend="ddp",
         plugins=[RPCSequentialPlugin(balance=[2, 1], rpc_timeout_sec=5 * 60)],
-        enable_pl_optimizer=True,
     )
 
     trainer.fit(model)
diff --git a/tests/utilities/test_all_gather_grad.py b/tests/utilities/test_all_gather_grad.py
index f82cfc94bcce21..1be6c6fe0486c1 100644
--- a/tests/utilities/test_all_gather_grad.py
+++ b/tests/utilities/test_all_gather_grad.py
@@ -89,7 +89,6 @@ def training_epoch_end(self, outputs) -> None:
         max_epochs=1,
         log_every_n_steps=1,
         accumulate_grad_batches=2,
-        enable_pl_optimizer=True,
         gpus=2,
         accelerator="ddp",
     )