From 2be323c9987abe603f88056aecae02ef0a01cd2e Mon Sep 17 00:00:00 2001 From: Kirill Suvorov Date: Mon, 13 Nov 2023 17:02:41 +0000 Subject: [PATCH] FEAT-#392: Send a warning if shared object storage is enabled but the MPI library does not support it. Signed-off-by: Kirill Suvorov --- docs/troubleshooting.rst | 9 +++++++++ unidist/core/backends/mpi/core/common.py | 15 ++++++++++++++- .../core/backends/mpi/core/shared_object_store.py | 4 +++- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index 74804fc4..64893341 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -202,3 +202,12 @@ To get more information about the flags refer to `Open MPI's mpiexec`_ command d .. _`Open MPI's mpiexec`: https://www.open-mpi.org/doc/v3.1/man1/mpiexec.1.php .. _`issue`: https://github.com/modin-project/unidist/issues + + +Shared object store is not supported in C/W model if the using MPICH version is less than the 4.2.0 version. +------------------------------------------------------------------------------------------------------------ +Unfortunately, this version of MPICH has a problem with shared memory in the Controller/Worker model. + +**Solution** +You can run your script using the SPMD model, or use other MPI implementations +such as Open MPI, Intel MPI, or MPICH above version 4.2.0. \ No newline at end of file diff --git a/unidist/core/backends/mpi/core/common.py b/unidist/core/backends/mpi/core/common.py index 2bb8af80..0ed7861a 100755 --- a/unidist/core/backends/mpi/core/common.py +++ b/unidist/core/backends/mpi/core/common.py @@ -6,6 +6,7 @@ import logging import inspect +import warnings import weakref from unidist.config.backends.mpi.envvars import MpiSpawn @@ -463,10 +464,15 @@ def versiontuple(v): return versiontuple(mpich_version) >= versiontuple(target_version) -def is_shared_memory_supported(): +def is_shared_memory_supported(send_warning=False): """ Check if the unidist on MPI supports shared memory. + Parameters + ---------- + send_warning: bool, default: False + The need for warning as a flag. + Returns ------- bool @@ -480,6 +486,9 @@ def is_shared_memory_supported(): return False if MPI.VERSION < 3: + warnings.warn( + "The too old version of MPI is used. Shared object store can not be used." + ) return False # Mpich shared memory does not work with spawned processes prior to version 4.2.0. @@ -488,6 +497,10 @@ def is_shared_memory_supported(): and MpiSpawn.get() and not check_mpich_version("4.2.0") ): + warnings.warn( + "Shared object store is not supported in C/W model if the using MPICH version is less than the 4.2.0 version." + + "Please read more about this problem in the `Troubleshooting` chapter of the Unidist documentation." + ) return False return True diff --git a/unidist/core/backends/mpi/core/shared_object_store.py b/unidist/core/backends/mpi/core/shared_object_store.py index 2a5aa236..356d2db7 100644 --- a/unidist/core/backends/mpi/core/shared_object_store.py +++ b/unidist/core/backends/mpi/core/shared_object_store.py @@ -106,8 +106,10 @@ def __init__(self): # Length of service shared memory buffer in items self.service_info_max_count = None + mpi_state = communication.MPIState.get_instance() + # Initialize all properties above - if common.is_shared_memory_supported(): + if common.is_shared_memory_supported(send_warning=mpi_state.is_root_process()): self._allocate_shared_memory() # Logger will be initialized after `communicator.MPIState`