Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
Signed-off-by: Sangkug Lym <[email protected]>
  • Loading branch information
erhoo82 committed Jan 2, 2024
1 parent ff13ffd commit f4b5515
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion nemo/collections/nlp/parts/megatron_trainer_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _training_strategy(self) -> Union[NLPDDPStrategy, NLPFSDPStrategy]:
gradient_as_bucket_view=self.cfg.model.gradient_as_bucket_view,
find_unused_parameters=False,
nccl_communicator_config_path=self.cfg.model.get('nccl_communicator_config_path', None),
sharp=cfg.model.get('sharp', False),
sharp=self.cfg.model.get('sharp', False),
)

def _grad_scaler(self) -> GradScaler:
Expand Down
4 changes: 2 additions & 2 deletions nemo/collections/nlp/parts/nlp_overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@
NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE = "NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE"


def init_model_parallel(sharp, nccl_communicator_config_path: str = None) -> None:
def init_model_parallel(sharp: bool, nccl_communicator_config_path: str = None) -> None:
""" Initializes Megatron-LM model parallel if using model parallelism.
Args:
Expand Down Expand Up @@ -140,7 +140,7 @@ class NLPDDPStrategy(DDPStrategy):
no_ddp_communication_hook: Disable DDP communication hook when using AMP-O2
with FP32 gradient accumulation.
nccl_communicator_config_path: Path to the yaml file with NCCL communicator options
sharp: Apply SHARP to data-parallel proc groups.
sharp: Apply SHARP to NCCL data-parallel communication.
"""

def __init__(
Expand Down

0 comments on commit f4b5515

Please sign in to comment.