NVIDIA · aklife97 · Jun 1, 2023 · May 26, 2023 · May 26, 2023 · May 31, 2023
diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py
@@ -168,6 +168,7 @@ def init_model_parallel(self, global_rank: int, world_size: int) -> None:
                     pipeline_model_parallel_size=app_state.pipeline_model_parallel_size,
                     virtual_pipeline_model_parallel_size=app_state.virtual_pipeline_model_parallel_size,
                     pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank,
+                    use_fp8=app_state.use_fp8,
                 )
 
                 # assert that fake tp and pp rank match after model parallel init
@@ -405,7 +406,7 @@ class PEFTSaveRestoreConnector(NLPSaveRestoreConnector):
     Args:
         peft_model_nemo_path: Used to provide the .nemo file corresponding to a PEFT model (which will only contain a small set of params)
         peft_model_ckpt_path: Used to provide the path to .ckpt files of a PEFt model. This is required when no .nemo is available (yet) such as during resumed training.
-    If both are provided the peft_model_ckpt_path takes precedence. 
+    If both are provided the peft_model_ckpt_path takes precedence.
     If neither are provided, PEFT params are initialized at random (not loaded from any external source).
     """