NVIDIA · aklife97 · May 10, 2023 · May 9, 2023
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -665,8 +665,17 @@ def validation_step(self, dataloader_iter, batch_idx):
             from the dataloader to produce a list of microbatches.
             The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions.
         """
+        if isinstance(self.model, list):
+            for model_module in self.model:
+                model_module.eval()
+
+        loss = self.fwd_bwd_step(dataloader_iter, batch_idx, True)
 
-        return self.fwd_bwd_step(dataloader_iter, batch_idx, True)
+        if isinstance(self.model, list):
+            for model_module in self.model:
+                model_module.train()
+
+        return loss
 
     def validation_epoch_end(self, outputs):
         if parallel_state.is_pipeline_last_stage():