From ae2d0111469d5aaf7f386875d1ea26a1e4345203 Mon Sep 17 00:00:00 2001
From: rohitgr7 <rohitgr1998@gmail.com>
Date: Tue, 5 Oct 2021 19:37:23 +0530
Subject: [PATCH 1/4] support_dict

---
 pytorch_lightning/plugins/training_type/deepspeed.py |  8 ++++----
 tests/accelerators/test_ipu.py                       |  2 +-
 tests/plugins/test_deepspeed_plugin.py               | 10 ++++++++++
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
index 1785b3644e2c7..5435d07fbcf80 100644
--- a/pytorch_lightning/plugins/training_type/deepspeed.py
+++ b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -375,11 +375,11 @@ def pre_dispatch(self):
         self.barrier()
 
     def init_deepspeed(self):
-        accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
-        if not isinstance(accumulate_grad_batches, int):
+        accumulation_scheduler = self.lightning_module.trainer.accumulation_scheduler
+
+        if accumulation_scheduler.epochs != [0]:
             raise MisconfigurationException(
-                "DeepSpeed currently only supports `Trainer.accumulate_grad_batches` being an integer."
-                f" Received {accumulate_grad_batches}"
+                "DeepSpeed currently does not support different `accumulate_grad_batches` at different epoch."
             )
 
         precision = self.lightning_module.trainer.accelerator.precision
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index ad8f4243418d1..acb3fd65959eb 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -308,7 +308,7 @@ def on_predict_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, da
 
 
 @RunIf(ipu=True)
-def test_accumulate_grad_batches_dict_fails(tmpdir):
+def test_different_accumulate_grad_batches_fails(tmpdir):
     model = IPUModel()
     trainer = Trainer(default_root_dir=tmpdir, ipus=1, accumulate_grad_batches={1: 2})
     with pytest.raises(
diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py
index 96e132d12c1c8..0691eb43d68b0 100644
--- a/tests/plugins/test_deepspeed_plugin.py
+++ b/tests/plugins/test_deepspeed_plugin.py
@@ -961,3 +961,13 @@ def configure_optimizers(self):
     else:
         # assert called once at init and once during training
         assert mock_step.call_count == 1 + (max_epoch * limit_train_batches)
+
+
+@RunIf(deepspeed=True)
+def test_different_accumulate_grad_batches_fails(tmpdir):
+    model = BoringModel()
+    trainer = Trainer(default_root_dir=tmpdir, ipus=1, accumulate_grad_batches={1: 2})
+    with pytest.raises(
+        MisconfigurationException, match="DeepSpeed currently does not support different `accumulate_grad_batches`"
+    ):
+        trainer.fit(model)

From fd3b61323e9778957ca7fbd35b4a909a5c56d3c3 Mon Sep 17 00:00:00 2001
From: rohitgr7 <rohitgr1998@gmail.com>
Date: Tue, 5 Oct 2021 19:42:56 +0530
Subject: [PATCH 2/4] chlog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 84c55b4ec8a4a..db110a6e5feef 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -249,6 +249,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Raise an exception if using `amp_level` with native `amp_backend` ([#9755](https://github.com/PyTorchLightning/pytorch-lightning/pull/9755))
 
 
+- Update the logic to check for accumulation steps with deepspeed ([#9826](https://github.com/PyTorchLightning/pytorch-lightning/pull/9826))
+
+
 ### Deprecated
 
 - Deprecated `LightningModule.summarize()` in favor of `pytorch_lightning.utilities.model_summary.summarize()`

From dd7fed865f8b974f8fcd5a30e3eb1b979938b996 Mon Sep 17 00:00:00 2001
From: rohitgr7 <rohitgr1998@gmail.com>
Date: Tue, 5 Oct 2021 21:25:53 +0530
Subject: [PATCH 3/4] fix test

---
 tests/plugins/test_deepspeed_plugin.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py
index 0691eb43d68b0..9afcbcd5175da 100644
--- a/tests/plugins/test_deepspeed_plugin.py
+++ b/tests/plugins/test_deepspeed_plugin.py
@@ -963,10 +963,10 @@ def configure_optimizers(self):
         assert mock_step.call_count == 1 + (max_epoch * limit_train_batches)
 
 
-@RunIf(deepspeed=True)
+@RunIf(min_gpus=1, deepspeed=True, special=True)
 def test_different_accumulate_grad_batches_fails(tmpdir):
     model = BoringModel()
-    trainer = Trainer(default_root_dir=tmpdir, ipus=1, accumulate_grad_batches={1: 2})
+    trainer = Trainer(default_root_dir=tmpdir, accumulate_grad_batches={1: 2}, gpus=1, plugins="deepspeed")
     with pytest.raises(
         MisconfigurationException, match="DeepSpeed currently does not support different `accumulate_grad_batches`"
     ):

From 86a0bcf78f6e4429e97c84271ae67aa9652f8989 Mon Sep 17 00:00:00 2001
From: rohitgr7 <rohitgr1998@gmail.com>
Date: Wed, 6 Oct 2021 14:19:53 +0530
Subject: [PATCH 4/4] epochs

---
 pytorch_lightning/plugins/training_type/deepspeed.py | 2 +-
 pytorch_lightning/plugins/training_type/ipu.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
index 5435d07fbcf80..f706e5f33346d 100644
--- a/pytorch_lightning/plugins/training_type/deepspeed.py
+++ b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -379,7 +379,7 @@ def init_deepspeed(self):
 
         if accumulation_scheduler.epochs != [0]:
             raise MisconfigurationException(
-                "DeepSpeed currently does not support different `accumulate_grad_batches` at different epoch."
+                "DeepSpeed currently does not support different `accumulate_grad_batches` at different epochs."
             )
 
         precision = self.lightning_module.trainer.accelerator.precision
diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 6aa919f8c4c42..8849c22777589 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -193,7 +193,7 @@ def _handle_gradient_accumulation_steps(self) -> None:
 
         if accumulation_scheduler.epochs != [0]:
             raise MisconfigurationException(
-                "IPUs currently does not support different `accumulate_grad_batches` at different epoch."
+                "IPUs currently does not support different `accumulate_grad_batches` at different epochs."
             )
 
         # TODO(@tchaton): Add support for accumulate_grad_batches being a dictionary