Skip to content

Commit

Permalink
FEAT-#392: Send a warning if shared object storage is enabled but the…
Browse files Browse the repository at this point in the history
… MPI library does not support it.

Signed-off-by: Kirill Suvorov <[email protected]>
  • Loading branch information
Retribution98 committed Nov 13, 2023
1 parent 7cf3903 commit 2be323c
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 2 deletions.
9 changes: 9 additions & 0 deletions docs/troubleshooting.rst
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,12 @@ To get more information about the flags refer to `Open MPI's mpiexec`_ command d
.. _`Open MPI's mpiexec`: https://www.open-mpi.org/doc/v3.1/man1/mpiexec.1.php
.. _`issue`: https://github.com/modin-project/unidist/issues
Shared object store is not supported in C/W model if the using MPICH version is less than the 4.2.0 version.
------------------------------------------------------------------------------------------------------------
Unfortunately, this version of MPICH has a problem with shared memory in the Controller/Worker model.
**Solution**
You can run your script using the SPMD model, or use other MPI implementations
such as Open MPI, Intel MPI, or MPICH above version 4.2.0.
15 changes: 14 additions & 1 deletion unidist/core/backends/mpi/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import logging
import inspect
import warnings
import weakref

from unidist.config.backends.mpi.envvars import MpiSpawn
Expand Down Expand Up @@ -463,10 +464,15 @@ def versiontuple(v):
return versiontuple(mpich_version) >= versiontuple(target_version)


def is_shared_memory_supported():
def is_shared_memory_supported(send_warning=False):
"""
Check if the unidist on MPI supports shared memory.
Parameters
----------
send_warning: bool, default: False
The need for warning as a flag.
Returns
-------
bool
Expand All @@ -480,6 +486,9 @@ def is_shared_memory_supported():
return False

if MPI.VERSION < 3:
warnings.warn(
"The too old version of MPI is used. Shared object store can not be used."
)
return False

# Mpich shared memory does not work with spawned processes prior to version 4.2.0.
Expand All @@ -488,6 +497,10 @@ def is_shared_memory_supported():
and MpiSpawn.get()
and not check_mpich_version("4.2.0")
):
warnings.warn(
"Shared object store is not supported in C/W model if the using MPICH version is less than the 4.2.0 version."
+ "Please read more about this problem in the `Troubleshooting` chapter of the Unidist documentation."
)
return False

return True
4 changes: 3 additions & 1 deletion unidist/core/backends/mpi/core/shared_object_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,10 @@ def __init__(self):
# Length of service shared memory buffer in items
self.service_info_max_count = None

mpi_state = communication.MPIState.get_instance()

# Initialize all properties above
if common.is_shared_memory_supported():
if common.is_shared_memory_supported(send_warning=mpi_state.is_root_process()):
self._allocate_shared_memory()

# Logger will be initialized after `communicator.MPIState`
Expand Down

0 comments on commit 2be323c

Please sign in to comment.