Skip to content

Commit

Permalink
mpi: Simplifiy Custom domain decomposition
Browse files Browse the repository at this point in the history
  • Loading branch information
georgebisbas committed Jun 8, 2023
1 parent 958579c commit 5fd03f7
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 65 deletions.
86 changes: 37 additions & 49 deletions devito/mpi/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,31 +567,29 @@ def _arg_values(self, *args, **kwargs):
class CustomTopology(tuple):

"""
A CustomTopology is a mechanism to describe parametric domain decompositions.
The CustomTopology class provides a mechanism to describe parametric domain
decompositions. It allows users to specify how the dimensions of a domain are
decomposed into chunks based on certain parameters.
Examples
--------
Assuming a domain consisting of three distributed Dimensions x, y, and z, and
an MPI communicator comprising N processes, a CustomTopology might be:
For example, let's consider a domain with three distributed dimensions: x, y, and z,
and an MPI communicator with N processes. Here are a few examples of CustomTopology:
With N known, say N=4:
* `(1, 1, 4)`: the z Dimension is decomposed into 4 chunks
* `(2, 1, 2)`: the x Dimension is decomposed into 2 chunks; the z Dimension
* `(2, 1, 2)`: the x Dimension is decomposed into 2 chunks and the z Dimension
is decomposed into 2 chunks
With N unknown:
* `(1, '*', 1)`: the wildcard `'*'` tells the runtime to decompose the y
* `(1, '*', 1)`: the wildcard `'*'` indicates that the runtime should decompose the y
Dimension into N chunks
* `('*', '*', 1)`: the wildcard `'*'` tells the runtime to decompose both
* `('*', '*', 1)`: the wildcard `'*'` indicates that the runtime should decompose both
the x and y Dimensions in `nstars` factors of N, prioritizing
the outermost dimension
Assuming N=6 and requested topology is `('*', '*', 1)`,
since there is no integer k, so that k*k=6, we resort to the closest factors to
the nstars-th root (usually square or cubic) that satisfies that the decomposed
domains are equal to the number of MPI processes.
Assuming that the number of ranks `N` cannot evenly be decomposed to the requested
stars=6 we decompose as evenly as possible by prioritising the outermost dimension
For N=3
* `('*', '*', 1)` gives: (3, 1, 1)
Expand All @@ -611,58 +609,48 @@ class CustomTopology(tuple):
Notes
-----
Users shouldn't use this class directly. It's up to the Devito runtime to
instantiate it based on the user input.
Users should not directly use the CustomTopology class. It is instantiated
by the Devito runtime based on user input.
"""

def __new__(cls, items, input_comm):
# Keep track of nstars and already defined decompositions
nstars = len([i for i in items if i == '*'])
nstars = items.count('*')

# If no stars exist we are ready
if nstars == 0:
processed = items
else:
# Init decomposition list
# Init decomposition list and track star positions
processed = [1] * len(items)

# Get star and integer indices
int_pos = [i for i, item in enumerate(items) if isinstance(item, int)]
int_vals = [item for item in items if isinstance(item, int)]
star_pos = [i for i, item in enumerate(items) if not isinstance(item, int)]

# Decompose the processes remaining for allocation to prime factors
star_pos = []
for i, item in enumerate(items):
if isinstance(item, int):
processed[i] = item
else:
star_pos.append(i)

# Compute the remaining procs to be allocated
alloc_procs = np.prod([i for i in items if i != '*'])
remprocs = int(input_comm.size // alloc_procs)
prime_factors = primefactors(remprocs)

star_i = -1
dd_list = [1] * nstars
rem_procs = int(input_comm.size // alloc_procs)

# Start by using the max prime factor at the first starred position,
# then cyclically-iteratively decompose as evenly as possible until
# decomposing to the number of `remprocs`
while remprocs != 1:
star_i = star_i + 1
star_i = star_i % nstars
prime_factors = primefactors(remprocs)
dd_list[star_i] = dd_list[star_i]*max(prime_factors)
remprocs = remprocs // max(prime_factors)

if int_pos:
for index, value in zip(int_pos, int_vals):
processed[index] = value

if dd_list:
for index, value in zip(star_pos, dd_list):
processed[index] = value
# then iteratively decompose as evenly as possible until decomposing
# to the number of `rem_procs`
star_vals = [1] * len(items)
star_i = 0
while rem_procs > 1:
prime_factors = primefactors(rem_procs)
rem_procs //= max(prime_factors)
star_vals[star_i] *= max(prime_factors)
star_i = (star_i + 1) % nstars

# Apply computed star values to the processed
for index, value in zip(star_pos, star_vals):
processed[index] = value

# Final check that topology matches the communicator size
try:
assert np.prod(processed) == input_comm.size
except:
raise ValueError("Invalid `topology`", processed, " for given nprocs:",
input_comm.size)
assert np.prod(processed) == input_comm.size

obj = super().__new__(cls, processed)
obj.logical = items
Expand Down
18 changes: 2 additions & 16 deletions tests/test_mpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,12 @@
from devito.mpi import MPI
from devito.mpi.routines import HaloUpdateCall, MPICall
from devito.mpi.distributed import CustomTopology
from devito.tools import Bunch
from examples.seismic.acoustic import acoustic_setup

pytestmark = skipif(['nompi'], whole_module=True)


class DummyInputComm():
"Helper class for modelling a communicator with a specific size"
def __init__(self, size):
self.size = size


class TestDistributor(object):

@pytest.mark.parallel(mode=[2, 4])
Expand Down Expand Up @@ -194,12 +189,9 @@ def test_custom_topology(self):
(2, (1, '*', '*'), (1, 2, 1)),
(2, (2, '*', '*'), (2, 1, 1)),
(3, (1, '*', '*'), (1, 3, 1)),
(3, ('*', 1, '*'), (3, 1, 1)),
(3, ('*', '*', 1), (3, 1, 1)),
(4, (2, '*', '*'), (2, 2, 1)),
(4, ('*', 2, '*'), (2, 2, 1)),
(4, ('*', '*', 2), (2, 1, 2)),
(6, ('*', 1, '*'), (3, 1, 2)),
(6, ('*', '*', 1), (3, 2, 1)),
(6, (1, '*', '*'), (1, 3, 2)),
(6, ('*', '*', '*'), (3, 2, 1)),
Expand All @@ -212,29 +204,23 @@ def test_custom_topology(self):
(32, ('*', '*', '*'), (4, 4, 2)),
(8, ('*', 1, '*'), (4, 1, 2)),
(8, ('*', '*', 1), (4, 2, 1)),
(8, (1, '*', '*'), (1, 4, 2)),
(8, ('*', '*', '*'), (2, 2, 2)),
(9, ('*', '*', '*'), (3, 3, 1)),
(11, (1, '*', '*'), (1, 11, 1)),
(22, ('*', '*', '*'), (11, 2, 1)),
(16, ('*', '*', 1), (4, 4, 1)),
(16, ('*', 1, '*'), (4, 1, 4)),
(32, ('*', '*', 1), (8, 4, 1)),
(64, ('*', '*', '*'), (4, 4, 4)),
(64, ('*', '*', 1), (8, 8, 1)),
(64, ('*', 2, 1), (32, 2, 1)),
(64, ('*', 2, 4), (8, 2, 4)),
(128, ('*', '*', 1), (16, 8, 1)),
(231, ('*', '*', '*'), (11, 7, 3)),
(256, (1, '*', '*'), (1, 16, 16)),
(256, ('*', 1, '*'), (16, 1, 16)),
(256, ('*', '*', 1), (16, 16, 1)),
(256, ('*', '*', '*'), (8, 8, 4)),
(256, ('*', '*', 2), (16, 8, 2)),
(256, ('*', 32, 2), (4, 32, 2)),
])
def test_custom_topology_3d_dummy(self, comm_size, topology, dist_topology):
dummy_comm = DummyInputComm(comm_size)
dummy_comm = Bunch(size=comm_size)
custom_topology = CustomTopology(topology, dummy_comm)
assert custom_topology == dist_topology

Expand Down

0 comments on commit 5fd03f7

Please sign in to comment.