cleanup

Signed-off-by: Sangkug Lym <[email protected]>
NVIDIA · Jan 2, 2024 · f4b5515 · f4b5515
1 parent ff13ffd
commit f4b5515
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 3 deletions.
diff --git a/nemo/collections/nlp/parts/megatron_trainer_builder.py b/nemo/collections/nlp/parts/megatron_trainer_builder.py
@@ -76,7 +76,7 @@ def _training_strategy(self) -> Union[NLPDDPStrategy, NLPFSDPStrategy]:
             gradient_as_bucket_view=self.cfg.model.gradient_as_bucket_view,
             find_unused_parameters=False,
             nccl_communicator_config_path=self.cfg.model.get('nccl_communicator_config_path', None),
-            sharp=cfg.model.get('sharp', False),
+            sharp=self.cfg.model.get('sharp', False),
         )
 
     def _grad_scaler(self) -> GradScaler:

diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py
@@ -93,7 +93,7 @@
 NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE = "NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE"
 
 
-def init_model_parallel(sharp, nccl_communicator_config_path: str = None) -> None:
+def init_model_parallel(sharp: bool, nccl_communicator_config_path: str = None) -> None:
     """ Initializes Megatron-LM model parallel if using model parallelism.
 
     Args:
@@ -140,7 +140,7 @@ class NLPDDPStrategy(DDPStrategy):
         no_ddp_communication_hook: Disable DDP communication hook when using AMP-O2
         with FP32 gradient accumulation.
         nccl_communicator_config_path: Path to the yaml file with NCCL communicator options
-        sharp: Apply SHARP to data-parallel proc groups.
+        sharp: Apply SHARP to NCCL data-parallel communication.
     """
 
     def __init__(