Lightning-AI · rohitgr7 · Oct 6, 2021 · Oct 5, 2021 · Oct 5, 2021 · Oct 5, 2021
@@ -249,6 +249,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Raise an exception if using `amp_level` with native `amp_backend` ([#9755](https://github.com/PyTorchLightning/pytorch-lightning/pull/9755))
 
 
+- Update the logic to check for accumulation steps with deepspeed ([#9826](https://github.com/PyTorchLightning/pytorch-lightning/pull/9826))
+
+
 ### Deprecated
 
 - Deprecated `LightningModule.summarize()` in favor of `pytorch_lightning.utilities.model_summary.summarize()`

@@ -375,11 +375,11 @@ def pre_dispatch(self):
         self.barrier()
 
     def init_deepspeed(self):
-        accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
-        if not isinstance(accumulate_grad_batches, int):
+        accumulation_scheduler = self.lightning_module.trainer.accumulation_scheduler
+
+        if accumulation_scheduler.epochs != [0]:
             raise MisconfigurationException(
-                "DeepSpeed currently only supports `Trainer.accumulate_grad_batches` being an integer."
-                f" Received {accumulate_grad_batches}"
+                "DeepSpeed currently does not support different `accumulate_grad_batches` at different epoch."
             )
 
         precision = self.lightning_module.trainer.accelerator.precision

@@ -308,7 +308,7 @@ def on_predict_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, da
 
 
 @RunIf(ipu=True)
-def test_accumulate_grad_batches_dict_fails(tmpdir):
+def test_different_accumulate_grad_batches_fails(tmpdir):
     model = IPUModel()
     trainer = Trainer(default_root_dir=tmpdir, ipus=1, accumulate_grad_batches={1: 2})
     with pytest.raises(

@@ -961,3 +961,13 @@ def configure_optimizers(self):
     else:
         # assert called once at init and once during training
         assert mock_step.call_count == 1 + (max_epoch * limit_train_batches)
+
+
+@RunIf(min_gpus=1, deepspeed=True, special=True)
+def test_different_accumulate_grad_batches_fails(tmpdir):
+    model = BoringModel()
+    trainer = Trainer(default_root_dir=tmpdir, accumulate_grad_batches={1: 2}, gpus=1, plugins="deepspeed")
+    with pytest.raises(
+        MisconfigurationException, match="DeepSpeed currently does not support different `accumulate_grad_batches`"
+    ):
+        trainer.fit(model)