cleanup

Signed-off-by: Sangkug Lym <[email protected]>
NVIDIA · Nov 22, 2023 · a312886 · a312886
1 parent 5c0f385
commit a312886
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 6 deletions.
diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
@@ -131,6 +131,7 @@ model:
   apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
   gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
   sync_batch_comm: False # Enable stream synchronization after each p2p communication between pipeline stages
+  nccl_communicator_config_path: null # Path to the yaml file with NCCL communicator options (min_ctas, max_ctas, and cga_cluster_size)
 
   ## Activation Checkpointing
   # NeMo Megatron supports 'selective' activation checkpointing where only the memory intensive part of attention is checkpointed.

diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py
@@ -177,17 +177,12 @@ def configure_ddp(self):
             else:
                 super().configure_ddp()
 
-    def init_model_parallel(
-        self,
-        global_rank: int,
-        world_size: int,
-    ) -> None:
+    def init_model_parallel(self, global_rank: int, world_size: int) -> None:
         """ Initializes Megatron-LM model parallel if using model parallelism.
 
         Args:
             global_rank (int): the global process index.
             world_size (int): the total number of GPUs, num_nodes * num_devices
-            is_slurm_managing_tasks (bool, optional): is the cluster managed by SLURM.
         """
         app_state = AppState()