Lightning-AI · awaelchli · Jun 27, 2021 · Jun 24, 2021 · Jun 24, 2021 · Jun 24, 2021
@@ -308,6 +308,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Pass the `stage` argument of `Callback.{setup,teardown}` as a keyword ([#7973](https://github.com/PyTorchLightning/pytorch-lightning/pull/7973))
 
 
+- Fixed a DDP info message that was never shown ([#8111](https://github.com/PyTorchLightning/pytorch-lightning/pull/8111))
+
 
 ## [1.3.7] - 2021-06-22
 

@@ -37,7 +37,7 @@
     rank_zero_deprecation,
     rank_zero_warn,
 )
-from pytorch_lightning.utilities.distributed import rank_zero_only, ReduceOp, sync_ddp_if_available
+from pytorch_lightning.utilities.distributed import rank_zero_info, rank_zero_only, ReduceOp, sync_ddp_if_available
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.seed import reset_seed
 
@@ -233,13 +233,6 @@ def setup_distributed(self):
         # where to store ip_table
         self.init_ddp_connection()
 
-        # on world_size=0 let everyone know training is starting
-        if self.is_global_zero and not torch.distributed.is_initialized():
-            log.info("-" * 100)
-            log.info(f"distributed_backend={self.distributed_backend}")
-            log.info(f"All DDP processes registered. Starting ddp with {self.world_size} processes")
-            log.info("-" * 100)
-
         # set the ranks and devices
         self.dist.rank = self.global_rank
         self.dist.device = self.root_device
@@ -308,6 +301,12 @@ def init_ddp_connection(self, global_rank: Optional[int] = None, world_size: Opt
             log.info(f"initializing ddp: GLOBAL_RANK: {global_rank}, MEMBER: {global_rank + 1}/{world_size}")
             torch_distrib.init_process_group(self.torch_distributed_backend, rank=global_rank, world_size=world_size)
 
+            # on rank=0 let everyone know training is starting
+            rank_zero_info(("-" * 100,
+            f"distributed_backend={self.distributed_backend}", 
+            f"All DDP processes registered. Starting ddp with {self.world_size} processes",
+            "-" * 100))
+
     def pre_dispatch(self):
         # move the model to the correct device
         self.model_to_device()

@@ -36,7 +36,7 @@
 )
 from pytorch_lightning.utilities.cloud_io import atomic_save
 from pytorch_lightning.utilities.cloud_io import load as pl_load
-from pytorch_lightning.utilities.distributed import rank_zero_only, ReduceOp, sync_ddp_if_available
+from pytorch_lightning.utilities.distributed import rank_zero_info, rank_zero_only, ReduceOp, sync_ddp_if_available
 from pytorch_lightning.utilities.seed import reset_seed
 
 if _TORCH_GREATER_EQUAL_1_8:
@@ -182,13 +182,6 @@ def new_process(self, process_idx, trainer, mp_queue):
         #   ... need to double check that it is the correct place
         # self.trainer.call_setup_hook(self.model)
 
-        # on world_size=0 let everyone know training is starting
-        if self.is_global_zero and not torch.distributed.is_initialized():
-            log.info("-" * 100)
-            log.info(f"distributed_backend={self.distributed_backend}")
-            log.info(f"All DDP processes registered. Starting ddp with {self.world_size} processes")
-            log.info("-" * 100)
-
         # set the ranks and devices
         self.dist.rank = self.global_rank
         self.dist.device = self.root_device
@@ -267,6 +260,12 @@ def init_ddp_connection(self, global_rank: Optional[int], world_size: Optional[i
             log.info(f"initializing ddp: GLOBAL_RANK: {global_rank}, MEMBER: {global_rank + 1}/{world_size}")
             torch_distrib.init_process_group(self.torch_distributed_backend, rank=global_rank, world_size=world_size)
 
+            # on rank=0 let everyone know training is starting
+            rank_zero_info("-" * 100)
+            rank_zero_info(f"distributed_backend={self.distributed_backend}")
+            rank_zero_info(f"All DDP processes registered. Starting ddp with {self.world_size} processes")
+            rank_zero_info("-" * 100)
+
     def determine_ddp_device_ids(self):
         if self.root_device.type == "cpu":
             return None
Original file line number	Diff line number	Diff line change
Expand Up		@@ -308,6 +308,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
		- Pass the `stage` argument of `Callback.{setup,teardown}` as a keyword ([#7973](https://github.com/PyTorchLightning/pytorch-lightning/pull/7973))


		- Fixed a DDP info message that was never shown ([#8111](https://github.com/PyTorchLightning/pytorch-lightning/pull/8111))


		## [1.3.7] - 2021-06-22

Expand Down