diff --git a/docs/source/common/optimizers.rst b/docs/source/common/optimizers.rst
index 3f7cd7f224a97..f1b9d3623278a 100644
--- a/docs/source/common/optimizers.rst
+++ b/docs/source/common/optimizers.rst
@@ -300,8 +300,6 @@ override the :meth:`optimizer_step` function.
 
 For example, here step optimizer A every 2 batches and optimizer B every 4 batches
 
-.. note:: When using Trainer(enable_pl_optimizer=True), there is no need to call `.zero_grad()`.
-
 .. testcode::
 
     def optimizer_zero_grad(self, current_epoch, batch_idx, optimizer, opt_idx):
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 57aa264244a68..c4d63cff4637b 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -1324,9 +1324,6 @@ def optimizer_step(
         By default, Lightning calls ``step()`` and ``zero_grad()`` as shown in the example
         once per optimizer.
 
-        .. tip:: With ``Trainer(enable_pl_optimizer=True)``, you can use ``optimizer.step()`` directly
-         and it will handle zero_grad, accumulated gradients, AMP, TPU and more automatically for you.
-
         Warning:
             If you are overriding this method, make sure that you pass the ``optimizer_closure`` parameter
             to ``optimizer.step()`` function as shown in the examples. This ensures that