diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index 3f5dd8110774..7033d57a0da6 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -1206,7 +1206,7 @@ def _reset_sequence_parallelism_args(self):
         for module in self.get_gpt_module_list():
             for mod in module.modules():
                 if hasattr(mod, "sequence_parallel"):
-                    mod.sequence_parallel = self.last_sequence_parallel
+                    mod.sequence_parallel = False
 
     def _restore_sequence_parallelism_args(self):
         """ Restores the sequence parallelism parameters using the values saved by