From ee57fb8aa7749f70ea3a9ca456bac33cfb86ce1e Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Sat, 27 May 2023 12:44:43 +0000 Subject: [PATCH 01/22] compiler: Fix Array.shape --- devito/types/array.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/devito/types/array.py b/devito/types/array.py index 3a7b81056e..a3c66d051f 100644 --- a/devito/types/array.py +++ b/devito/types/array.py @@ -31,11 +31,14 @@ def _C_name(self): else: return super()._C_name - @property + @cached_property def shape(self): - return self.symbolic_shape + ret = [i.symbolic_size for i in self.dimensions] + return DimensionTuple(*ret, getters=self.dimensions) - shape_allocated = shape + @property + def shape_allocated(self): + return self.symbolic_shape class Array(ArrayBasic): From 858c34843261ef5d8725addea08b18080c1de5ca Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Fri, 26 May 2023 13:10:34 +0000 Subject: [PATCH 02/22] mpi: Support C-land Array halo exchange --- devito/mpi/halo_scheme.py | 39 +++++++++++++++++++++++++++++ devito/mpi/routines.py | 38 ++++++++++++++++++++-------- devito/passes/clusters/buffering.py | 5 ++++ devito/types/array.py | 33 +++++++++++++++--------- devito/types/basic.py | 18 ++++++++++++- devito/types/dense.py | 17 +------------ 6 files changed, 110 insertions(+), 40 deletions(-) diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index d6c9f8933f..0678b26fea 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -9,6 +9,7 @@ from devito import configuration from devito.data import CORE, OWNED, LEFT, CENTER, RIGHT from devito.ir.support import Forward, Scope +from devito.symbolics.manipulation import _uxreplace_registry from devito.tools import (Reconstructable, Tag, as_tuple, filter_ordered, flatten, frozendict, is_integer) from devito.types import Grid @@ -583,3 +584,41 @@ def __eq__(self, other): return isinstance(other, HaloTouch) and self.halo_scheme == other.halo_scheme func = Reconstructable._rebuild + + +def _uxreplace_dispatch_haloscheme(hs0, rule): + changed = False + hs = hs0 + for f, hse0 in hs0.fmapper.items(): + try: + b = rule[f] + except KeyError: + continue + + # Infer `loc_indices` and `loc_dirs` from context + loc_indices = {} + loc_dirs = {} + for d0, loc_index in hse0.loc_indices.items(): + for i, v in rule.items(): + if not (i.is_Indexed and i.function is f and v.is_Indexed): + continue + if i.indices[d0] == loc_index: + # Found! + d1 = b.indices[d0] + loc_indices[d1] = v.indices[d0] + loc_dirs[d1] = hse0.loc_dirs[d0] + break + else: + raise ValueError("Unable to perform HaloTouch replacement") + + hse = HaloSchemeEntry(frozendict(loc_indices), frozendict(loc_dirs), + hse0.halos, hse0.dims) + + hs = hs.drop(f).add(b.function, hse) + changed |= True + + return hs, changed + + +_uxreplace_registry.register(HaloTouch, + {HaloScheme: _uxreplace_dispatch_haloscheme}) diff --git a/devito/mpi/routines.py b/devito/mpi/routines.py index bca4aee49d..0738180ef7 100644 --- a/devito/mpi/routines.py +++ b/devito/mpi/routines.py @@ -17,7 +17,7 @@ from devito.symbolics import (Byref, CondNe, FieldFromPointer, FieldFromComposite, IndexedPointer, Macro, cast_mapper, subs_op_args) from devito.tools import (as_mapper, dtype_to_mpitype, dtype_len, dtype_to_ctype, - flatten, generator, split) + flatten, generator, is_integer, split) from devito.types import (Array, Bundle, Dimension, Eq, Symbol, LocalObject, CompositeObject, CustomDimension) @@ -1156,7 +1156,19 @@ def halos(self): def npeers(self): return len(self._halos) - def _arg_defaults(self, allocator, alias): + def _as_number(self, v, args): + """ + Turn a sympy.Symbol into a number. In doing so, perform a number of + sanity checks to ensure we get a Symbol iff the Msg is for an Array. + """ + if is_integer(v): + return v + else: + assert self.target.c0.is_Array + assert args is not None + return v.subs(args) + + def _arg_defaults(self, allocator, alias, args=None): # Lazy initialization if `allocator` is necessary as the `allocator` # type isn't really known until an Operator is constructed self._allocator = allocator @@ -1165,14 +1177,14 @@ def _arg_defaults(self, allocator, alias): for i, halo in enumerate(self.halos): entry = self.value[i] - # Buffer size for this peer + # Buffer shape for this peer shape = [] for dim, side in zip(*halo): try: shape.append(getattr(f._size_owned[dim], side.name)) except AttributeError: assert side is CENTER - shape.append(f._size_domain[dim]) + shape.append(self._as_number(f._size_domain[dim], args)) entry.sizes = (c_int*len(shape))(*shape) # Allocate the send/recv buffers @@ -1181,8 +1193,8 @@ def _arg_defaults(self, allocator, alias): entry.bufg, bufg_memfree_args = allocator._alloc_C_libcall(size, ctype) entry.bufs, bufs_memfree_args = allocator._alloc_C_libcall(size, ctype) - # The `memfree_args` will be used to deallocate the buffer upon returning - # from C-land + # The `memfree_args` will be used to deallocate the buffer upon + # returning from C-land self._memfree_args.extend([bufg_memfree_args, bufs_memfree_args]) return {self.name: self.value} @@ -1198,7 +1210,7 @@ def _arg_values(self, args=None, **kwargs): else: alias = f - return self._arg_defaults(args.allocator, alias=alias) + return self._arg_defaults(args.allocator, alias=alias, args=args) def _arg_apply(self, *args, **kwargs): self._C_memfree() @@ -1218,30 +1230,34 @@ class MPIMsgEnriched(MPIMsg): (_C_field_to, c_int) ] - def _arg_defaults(self, allocator, alias=None): - super()._arg_defaults(allocator, alias) + def _arg_defaults(self, allocator, alias=None, args=None): + super()._arg_defaults(allocator, alias, args=args) f = alias or self.target.c0 neighborhood = f.grid.distributor.neighborhood for i, halo in enumerate(self.halos): entry = self.value[i] + # `torank` peer + gather offsets entry.torank = neighborhood[halo.side] ofsg = [] for dim, side in zip(*halo): try: - ofsg.append(getattr(f._offset_owned[dim], side.name)) + v = getattr(f._offset_owned[dim], side.name) + ofsg.append(self._as_number(v, args)) except AttributeError: assert side is CENTER ofsg.append(f._offset_owned[dim].left) entry.ofsg = (c_int*len(ofsg))(*ofsg) + # `fromrank` peer + scatter offsets entry.fromrank = neighborhood[tuple(i.flip() for i in halo.side)] ofss = [] for dim, side in zip(*halo): try: - ofss.append(getattr(f._offset_halo[dim], side.flip().name)) + v = getattr(f._offset_halo[dim], side.flip().name) + ofss.append(self._as_number(v, args)) except AttributeError: assert side is CENTER # Note `_offset_owned`, and not `_offset_halo`, is *not* a bug diff --git a/devito/passes/clusters/buffering.py b/devito/passes/clusters/buffering.py index e5826397db..8e4c16074a 100644 --- a/devito/passes/clusters/buffering.py +++ b/devito/passes/clusters/buffering.py @@ -202,6 +202,10 @@ def callback(self, clusters, prefix, cache=None): # Substitution rules to replace buffered Functions with buffers subs = {} for b in buffers: + # Enough information to apply uxreplace to a HaloTouch, if necessary + subs[b.function] = b.buffer + + # All other Indexeds for a in b.accessv.accesses: subs[a] = b.indexed[[b.index_mapper.get(i, i) for i in a.indices]] @@ -438,6 +442,7 @@ def __init__(self, function, dim, d, accessv, cache, options, sregistry): 'name': sregistry.make_name(prefix='%sb' % function.name), 'dimensions': dims, 'dtype': function.dtype, + 'grid': function.grid, 'halo': function.halo, 'space': 'mapped', 'mapped': function diff --git a/devito/types/array.py b/devito/types/array.py index a3c66d051f..69d9ed0889 100644 --- a/devito/types/array.py +++ b/devito/types/array.py @@ -79,6 +79,11 @@ class Array(ArrayBasic): architecture doesn't have something akin to constant memory, the Array falls back to a global, const, static array in a C/C++ sense. Note that not all scopes make sense for a given space. + grid : Grid, optional + Only necessary for distributed-memory parallelism; a Grid contains + information about the distributed Dimensions, hence it is necessary + if (and only if) an Operator requires to perform a halo exchange on + an Array. initvalue : array-like, optional The initial content of the Array. Must be None if `scope='heap'`. @@ -206,6 +211,13 @@ def _mem_shared(self): def _mem_constant(self): return self._scope == 'constant' + @property + def _distributor(self): + try: + return self.grid.distributor + except AttributeError: + return None + @property def initvalue(self): return self._initvalue @@ -422,6 +434,11 @@ def __halo_setup__(self, components=(), **kwargs): raise ValueError("Components must have the same halo") return halos.pop() + @property + def c0(self): + # Shortcut for self.components[0] + return self.components[0] + # Class attributes overrides @property @@ -432,6 +449,10 @@ def is_DiscreteFunction(self): def is_TimeFunction(self): return self.c0.is_TimeFunction + @property + def grid(self): + return self.c0.grid + # Other properties and methods @property @@ -442,18 +463,6 @@ def components(self): def ncomp(self): return len(self.components) - @property - def c0(self): - # Shortcut for self.components[0] - return self.components[0] - - @property - def grid(self): - if self.is_DiscreteFunction: - return self.c0.grid - else: - return None - @property def symbolic_shape(self): # A Bundle may be defined over a SteppingDimension, which is of unknown diff --git a/devito/types/basic.py b/devito/types/basic.py index 47cb9f8f49..620b430290 100644 --- a/devito/types/basic.py +++ b/devito/types/basic.py @@ -792,7 +792,7 @@ class AbstractFunction(sympy.Function, Basic, Cached, Pickable, Evaluable): True if data is allocated as a single, contiguous chunk of memory. """ - __rkwargs__ = ('name', 'dtype', 'halo', 'padding', 'alias') + __rkwargs__ = ('name', 'dtype', 'grid', 'halo', 'padding', 'alias') @classmethod def _cache_key(cls, *args, **kwargs): @@ -870,6 +870,9 @@ def __init_finalize__(self, *args, **kwargs): self._halo = self.__halo_setup__(**kwargs) self._padding = self.__padding_setup__(**kwargs) + # There may or may not be a `Grid` + self._grid = kwargs.get('grid') + # Symbol properties # "Aliasing" another DiscreteFunction means that `self` logically # represents another object. For example, `self` might be used as the @@ -1001,6 +1004,11 @@ def shape(self): """The shape of the object.""" return self._shape + @property + def grid(self): + """The Grid on which the discretization occurred.""" + return self._grid + @property def dtype(self): return self._dtype @@ -1103,6 +1111,14 @@ def _make_pointer(self): """Generate a symbolic pointer to self.""" raise NotImplementedError + @cached_property + def _dist_dimensions(self): + """The Dimensions decomposed for distributed-parallelism.""" + if self._distributor is None: + return () + else: + return tuple(d for d in self.dimensions if d in self._distributor.dimensions) + @cached_property def _size_domain(self): """Number of points in the domain region.""" diff --git a/devito/types/dense.py b/devito/types/dense.py index 454c642176..a2daec211f 100644 --- a/devito/types/dense.py +++ b/devito/types/dense.py @@ -57,7 +57,7 @@ class DiscreteFunction(AbstractFunction, ArgProvider, Differentiable): The type of the underlying data object. """ - __rkwargs__ = AbstractFunction.__rkwargs__ + ('grid', 'staggered', 'initializer') + __rkwargs__ = AbstractFunction.__rkwargs__ + ('staggered', 'initializer') def __init_finalize__(self, *args, **kwargs): # A `Distributor` to handle domain decomposition (only relevant for MPI) @@ -70,9 +70,6 @@ def __init_finalize__(self, *args, **kwargs): # superclass constructor do its job super(DiscreteFunction, self).__init_finalize__(*args, **kwargs) - # There may or may not be a `Grid` attached to the DiscreteFunction - self._grid = kwargs.get('grid') - # Symbolic (finite difference) coefficients self._coefficients = kwargs.get('coefficients', 'standard') if self._coefficients not in ('standard', 'symbolic'): @@ -200,11 +197,6 @@ def _data_alignment(self): def _mem_external(self): return True - @property - def grid(self): - """The Grid on which the discretization occurred.""" - return self._grid - @property def staggered(self): return self._staggered @@ -652,13 +644,6 @@ def space_dimensions(self): """Tuple of Dimensions defining the physical space.""" return tuple(d for d in self.dimensions if d.is_Space) - @cached_property - def _dist_dimensions(self): - """Tuple of MPI-distributed Dimensions.""" - if self._distributor is None: - return () - return tuple(d for d in self.dimensions if d in self._distributor.dimensions) - @property def initializer(self): if self._data is not None: From 4a184e0fcfc04553897e4bc39a8299d4b56c0f49 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Mon, 29 May 2023 08:19:37 +0000 Subject: [PATCH 03/22] compiler: Relax WaitLock regions in a ScheduleTree --- devito/ir/iet/algorithms.py | 2 +- devito/ir/stree/algorithms.py | 30 ++++++++++++++++++++++----- tests/test_gpu_common.py | 39 ++++++++++++++++++++--------------- 3 files changed, 48 insertions(+), 23 deletions(-) diff --git a/devito/ir/iet/algorithms.py b/devito/ir/iet/algorithms.py index 5c0f2341c3..59f7237fca 100644 --- a/devito/ir/iet/algorithms.py +++ b/devito/ir/iet/algorithms.py @@ -43,7 +43,7 @@ def iet_build(stree): body = HaloSpot(queues.pop(i), i.halo_scheme) elif i.is_Sync: - body = SyncSpot(i.sync_ops, body=queues.pop(i)) + body = SyncSpot(i.sync_ops, body=queues.pop(i, None)) queues.setdefault(i.parent, []).append(body) diff --git a/devito/ir/stree/algorithms.py b/devito/ir/stree/algorithms.py index e6aebf44f5..9dc6a37dba 100644 --- a/devito/ir/stree/algorithms.py +++ b/devito/ir/stree/algorithms.py @@ -7,7 +7,8 @@ from devito.ir.stree.tree import (ScheduleTree, NodeIteration, NodeConditional, NodeSync, NodeExprs, NodeSection, NodeHalo) from devito.ir.support import (SEQUENTIAL, Any, Interval, IterationInterval, - IterationSpace, normalize_properties, normalize_syncs) + IterationSpace, WaitLock, normalize_properties, + normalize_syncs) from devito.mpi.halo_scheme import HaloScheme from devito.tools import Bunch, DefaultOrderedDict @@ -176,15 +177,34 @@ def reuse_partial_subtree(c0, c1, d=None): def reuse_whole_subtree(c0, c1, d=None): - return (c0.guards.get(d) == c1.guards.get(d) and - c0.syncs.get(d) == c1.syncs.get(d)) + if not reuse_partial_subtree(c0, c1, d): + return False + + syncs0 = c0.syncs.get(d, []) + syncs1 = c1.syncs.get(d, []) + + if syncs0 == syncs1: + return True + elif not syncs0 and all(isinstance(s, WaitLock) for s in syncs1): + return True + + return False def augment_partial_subtree(cluster, tip, mapper, it=None): d = it.dim - if d in cluster.syncs: - tip = NodeSync(cluster.syncs[d], tip) + try: + syncs = cluster.syncs[d] + if all(isinstance(s, WaitLock) for s in syncs): + # Unlike all other SyncOps, a WaitLock "floats" in the stree, in that + # it doesn't need to wrap any subtree. Thus, a WaitLock acts like + # a barrier to what follows inside `d` + NodeSync(syncs, tip) + else: + tip = NodeSync(syncs, tip) + except KeyError: + pass mapper[it].bottom = tip diff --git a/tests/test_gpu_common.py b/tests/test_gpu_common.py index 616c916059..1eb11f02aa 100644 --- a/tests/test_gpu_common.py +++ b/tests/test_gpu_common.py @@ -143,12 +143,13 @@ def test_tasking_in_isolation(self, opt): op = Operator(eqns, opt=opt) # Check generated code - assert len(retrieve_iteration_tree(op)) == 3 + trees = retrieve_iteration_tree(op) + assert len(trees) == 2 assert len([i for i in FindSymbols().visit(op) if isinstance(i, Lock)]) == 1 sections = FindNodes(Section).visit(op) - assert len(sections) == 3 - assert str(sections[0].body[0].body[0].body[0].body[0]) == 'while(lock0[0] == 0);' - body = sections[2].body[0].body[0] + assert len(sections) == 2 + assert str(trees[0].root.nodes[0].body[0]) == 'while(lock0[0] == 0);' + body = sections[1].body[0].body[0] assert str(body.body[0].condition) == 'Ne(lock0[0], 2)' assert str(body.body[1]) == 'lock0[0] = 0;' body = body.body[2] @@ -217,11 +218,12 @@ def test_tasking_unfused_two_locks(self): op = Operator(eqns, opt=('tasking', 'fuse', 'orchestrate', {'linearize': False})) # Check generated code - assert len(retrieve_iteration_tree(op)) == 3 + trees = retrieve_iteration_tree(op) + assert len(trees) == 3 assert len([i for i in FindSymbols().visit(op) if isinstance(i, Lock)]) == 1 + 2 sections = FindNodes(Section).visit(op) assert len(sections) == 4 - assert (str(sections[1].body[0].body[0].body[0].body[0]) == + assert (str(trees[0].root.nodes[1].body[0]) == 'while(lock0[0] == 0 || lock1[0] == 0);') # Wait-lock body = sections[2].body[0].body[0] assert str(body.body[0].condition) == 'Ne(lock0[0], 2)' @@ -268,11 +270,12 @@ def test_tasking_forcefuse(self): {'fuse-tasks': True, 'linearize': False})) # Check generated code - assert len(retrieve_iteration_tree(op)) == 3 + trees = retrieve_iteration_tree(op) + assert len(trees) == 3 assert len([i for i in FindSymbols().visit(op) if isinstance(i, Lock)]) == 2 sections = FindNodes(Section).visit(op) assert len(sections) == 3 - assert (str(sections[1].body[0].body[0].body[0].body[0]) == + assert (str(trees[0].root.nodes[1].body[0]) == 'while(lock0[0] == 0 || lock1[0] == 0);') # Wait-lock body = sections[2].body[0].body[0] assert str(body.body[0].condition) == 'Ne(lock0[0], 2) | Ne(lock1[0], 2)' @@ -337,12 +340,12 @@ def test_tasking_multi_output(self): op1 = Operator(eqns, opt=('tasking', 'orchestrate', {'linearize': False})) # Check generated code - assert len(retrieve_iteration_tree(op1)) == 4 + trees = retrieve_iteration_tree(op1) + assert len(trees) == 4 assert len([i for i in FindSymbols().visit(op1) if isinstance(i, Lock)]) == 1 + assert str(trees[1].root.nodes[0].body[0]) == 'while(lock0[t2] == 0);' sections = FindNodes(Section).visit(op1) assert len(sections) == 2 - assert str(sections[0].body[0].body[0].body[0].body[0]) ==\ - 'while(lock0[t2] == 0);' for i in range(3): assert 'lock0[t' in str(sections[1].body[0].body[0].body[1 + i]) # Set-lock assert str(sections[1].body[0].body[0].body[4].body[-1]) ==\ @@ -374,13 +377,13 @@ def test_tasking_lock_placement(self): op = Operator(eqns, opt=('tasking', 'orchestrate')) # Check generated code -- the wait-lock is expected in section1 - assert len(retrieve_iteration_tree(op)) == 5 + trees = retrieve_iteration_tree(op) + assert len(trees) == 5 assert len([i for i in FindSymbols().visit(op) if isinstance(i, Lock)]) == 1 sections = FindNodes(Section).visit(op) assert len(sections) == 3 assert sections[0].body[0].body[0].body[0].is_Iteration - assert str(sections[1].body[0].body[0].body[0].body[0]) ==\ - 'while(lock0[t1] == 0);' + assert str(trees[1].root.nodes[1].body[0]) == 'while(lock0[t1] == 0);' @pytest.mark.parametrize('opt,ntmps', [ (('buffering', 'streaming', 'orchestrate'), 2), @@ -812,11 +815,12 @@ def test_tasking_over_compiler_generated(self): # Check generated code for op in [op1, op2]: - assert len(retrieve_iteration_tree(op)) == 5 + trees = retrieve_iteration_tree(op) + assert len(trees) == 5 assert len([i for i in FindSymbols().visit(op) if isinstance(i, Lock)]) == 1 sections = FindNodes(Section).visit(op) assert len(sections) == 3 - assert 'while(lock0[t1] == 0)' in str(sections[1].body[0].body[0].body[0]) + assert 'while(lock0[t1] == 0)' in str(trees[1].root.nodes[1].body[0]) op0.apply(time_M=nt-1) op1.apply(time_M=nt-1, u=u1, usave=usave1) @@ -1272,7 +1276,8 @@ def test_fuse_compatible_guards(self): 't,x,y,x,y,x,y') nodes = FindNodes(Conditional).visit(op) assert len(nodes) == 2 - assert len(nodes[1].then_body) == 3 + assert len(nodes[1].then_body) == 4 + assert str(nodes[1].then_body[0].body[0]) == 'while(lock0[0] == 0);' assert len(retrieve_iteration_tree(nodes[1])) == 2 From 2d1573cd9fd2e86fc6505db7cd27f65c179388d1 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Mon, 29 May 2023 09:20:21 +0000 Subject: [PATCH 04/22] compiler: Relax HaloTouch behavior upon uxreplace --- devito/mpi/halo_scheme.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index 0678b26fea..280c490eb9 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -609,7 +609,8 @@ def _uxreplace_dispatch_haloscheme(hs0, rule): loc_dirs[d1] = hse0.loc_dirs[d0] break else: - raise ValueError("Unable to perform HaloTouch replacement") + loc_indices[d0] = loc_index + loc_dirs[d0] = hse0.loc_dirs[d0] hse = HaloSchemeEntry(frozendict(loc_indices), frozendict(loc_dirs), hse0.halos, hse0.dims) From 7d8333103ff0947ebcc1ff6c472fcae83d9a501e Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Mon, 29 May 2023 15:08:44 +0000 Subject: [PATCH 05/22] compiler: Improve HaloTouch + uxreplace --- devito/mpi/halo_scheme.py | 54 +++++++++++++++++------------ devito/passes/clusters/buffering.py | 4 --- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index 280c490eb9..b133156ade 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -590,33 +590,41 @@ def _uxreplace_dispatch_haloscheme(hs0, rule): changed = False hs = hs0 for f, hse0 in hs0.fmapper.items(): - try: - b = rule[f] - except KeyError: - continue + # Is it an attempt to replace `f`? + for i, v in rule.items(): + loc_indices = {} + loc_dirs = {} + + if i is f: + # Yes! + g = v + hse = hse0 + + elif i.is_Indexed and i.function is f and v.is_Indexed: + # Yes, but through an Indexed, hence the `loc_indices` may now + # differ; let's infer them from the context + g = v.function + + for d0, loc_index in hse0.loc_indices.items(): + if i.indices[d0] == loc_index: + # They indeed do change + d1 = g.indices[d0] + loc_indices[d1] = v.indices[d0] + loc_dirs[d1] = hse0.loc_dirs[d0] + else: + loc_indices[d0] = loc_index + loc_dirs[d0] = hse0.loc_dirs[d0] - # Infer `loc_indices` and `loc_dirs` from context - loc_indices = {} - loc_dirs = {} - for d0, loc_index in hse0.loc_indices.items(): - for i, v in rule.items(): - if not (i.is_Indexed and i.function is f and v.is_Indexed): - continue - if i.indices[d0] == loc_index: - # Found! - d1 = b.indices[d0] - loc_indices[d1] = v.indices[d0] - loc_dirs[d1] = hse0.loc_dirs[d0] - break + hse = HaloSchemeEntry(frozendict(loc_indices), + frozendict(loc_dirs), + hse0.halos, hse0.dims) else: - loc_indices[d0] = loc_index - loc_dirs[d0] = hse0.loc_dirs[d0] + continue - hse = HaloSchemeEntry(frozendict(loc_indices), frozendict(loc_dirs), - hse0.halos, hse0.dims) + hs = hs.drop(f).add(g, hse) + changed |= True - hs = hs.drop(f).add(b.function, hse) - changed |= True + break return hs, changed diff --git a/devito/passes/clusters/buffering.py b/devito/passes/clusters/buffering.py index 8e4c16074a..ea31d5d0da 100644 --- a/devito/passes/clusters/buffering.py +++ b/devito/passes/clusters/buffering.py @@ -202,10 +202,6 @@ def callback(self, clusters, prefix, cache=None): # Substitution rules to replace buffered Functions with buffers subs = {} for b in buffers: - # Enough information to apply uxreplace to a HaloTouch, if necessary - subs[b.function] = b.buffer - - # All other Indexeds for a in b.accessv.accesses: subs[a] = b.indexed[[b.index_mapper.get(i, i) for i in a.indices]] From 14257b1ce5a9998bdf7878a0d4da7e99a9ce866c Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Mon, 29 May 2023 15:19:52 +0000 Subject: [PATCH 06/22] mpi: Use ad-hoc Dimensions for buffers --- devito/mpi/routines.py | 30 ++++++++++++++++-------------- devito/types/dimension.py | 6 +++--- examples/mpi/overview.ipynb | 20 ++++++++++---------- 3 files changed, 29 insertions(+), 27 deletions(-) diff --git a/devito/mpi/routines.py b/devito/mpi/routines.py index 0738180ef7..679c755f07 100644 --- a/devito/mpi/routines.py +++ b/devito/mpi/routines.py @@ -338,21 +338,24 @@ def _make_all(self, f, hse, msg): def _make_copy(self, f, hse, key, swap=False): dims = [d.root for d in f.dimensions if d not in hse.loc_indices] + ofs = [Symbol(name='o%s' % d.root, is_const=True) for d in f.dimensions] - f_offsets = [] - f_indices = [] - for d, h in zip(f.dimensions, f._size_nodomain.left): - offset = Symbol(name='o%s' % d.root, is_const=True) - f_offsets.append(offset) - offset_nohalo = offset - h - f_indices.append(offset_nohalo + (d.root if d not in hse.loc_indices else 0)) + bshape = [Symbol(name='b%s' % d.symbolic_size) for d in dims] + bdims = [CustomDimension(name=d.name, parent=d, symbolic_size=s) + for d, s in zip(dims, bshape)] eqns = [] - eqns.extend([Eq(d.symbolic_min, 0) for d in dims]) - eqns.extend([Eq(d.symbolic_max, d.symbolic_size - 1) for d in dims]) + eqns.extend([Eq(d.symbolic_min, 0) for d in bdims]) + eqns.extend([Eq(d.symbolic_max, d.symbolic_size - 1) for d in bdims]) + + vd = CustomDimension(name='vd', symbolic_size=f.ncomp) + buf = Array(name='buf', dimensions=[vd] + bdims, dtype=f.c0.dtype, + padding=0) + + mapper = dict(zip(dims, bdims)) + findices = [o - h + mapper.get(d.root, 0) + for d, o, h in zip(f.dimensions, ofs, f._size_nodomain.left)] - bdims = [CustomDimension(name='vd', symbolic_size=f.ncomp)] + dims - buf = Array(name='buf', dimensions=bdims, dtype=f.c0.dtype, padding=0) if swap is False: swap = lambda i, j: (i, j) name = 'gather%s' % key @@ -360,13 +363,12 @@ def _make_copy(self, f, hse, key, swap=False): swap = lambda i, j: (j, i) name = 'scatter%s' % key for i, c in enumerate(f.components): - eqns.append(Eq(*swap(buf[[i] + dims], c[f_indices]))) + eqns.append(Eq(*swap(buf[[i] + bdims], c[findices]))) # Compile `eqns` into an IET via recursive compilation irs, _ = self.rcompile(eqns) - shape = [d.symbolic_size for d in dims] - parameters = [buf] + shape + list(f.components) + f_offsets + parameters = [buf] + bshape + list(f.components) + ofs return CopyBuffer(name, irs.uiet, parameters) diff --git a/devito/types/dimension.py b/devito/types/dimension.py index 43e7a3d0f3..ffec8a3f42 100644 --- a/devito/types/dimension.py +++ b/devito/types/dimension.py @@ -1239,10 +1239,10 @@ def spacing(self): @property def bound_symbols(self): + ret = {self.symbolic_min, self.symbolic_max, self.symbolic_size} if self.is_Derived: - return self.parent.bound_symbols - else: - return frozenset() + ret.update(self.parent.bound_symbols) + return frozenset(i for i in ret if i.is_Symbol) @cached_property def _defines(self): diff --git a/examples/mpi/overview.ipynb b/examples/mpi/overview.ipynb index 92b88b6048..3f535748f8 100644 --- a/examples/mpi/overview.ipynb +++ b/examples/mpi/overview.ipynb @@ -451,8 +451,8 @@ " double section0;\n", "} ;\n", "\n", - "static void gather0(float *restrict buf_vec, const int x_size, const int y_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy);\n", - "static void scatter0(float *restrict buf_vec, const int x_size, const int y_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy);\n", + "static void gather0(float *restrict buf_vec, int bx_size, int by_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy);\n", + "static void scatter0(float *restrict buf_vec, int bx_size, int by_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy);\n", "static void sendrecv0(struct dataobj *restrict u_vec, const int x_size, const int y_size, int ogtime, int ogx, int ogy, int ostime, int osx, int osy, int fromrank, int torank, MPI_Comm comm);\n", "static void haloupdate0(struct dataobj *restrict u_vec, MPI_Comm comm, struct neighborhood * nb, int otime);\n", "\n", @@ -486,15 +486,15 @@ " return 0;\n", "}\n", "\n", - "static void gather0(float *restrict buf_vec, const int x_size, const int y_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy)\n", + "static void gather0(float *restrict buf_vec, int bx_size, int by_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy)\n", "{\n", - " float (*restrict buf)[x_size][y_size] __attribute__ ((aligned (64))) = (float (*)[x_size][y_size]) buf_vec;\n", + " float (*restrict buf)[bx_size][by_size] __attribute__ ((aligned (64))) = (float (*)[bx_size][by_size]) buf_vec;\n", " float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data;\n", "\n", " const int x_m = 0;\n", " const int y_m = 0;\n", - " const int x_M = x_size - 1;\n", - " const int y_M = y_size - 1;\n", + " const int x_M = bx_size - 1;\n", + " const int y_M = by_size - 1;\n", " for (int x = x_m; x <= x_M; x += 1)\n", " {\n", " #pragma omp simd aligned(u:64)\n", @@ -505,15 +505,15 @@ " }\n", "}\n", "\n", - "static void scatter0(float *restrict buf_vec, const int x_size, const int y_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy)\n", + "static void scatter0(float *restrict buf_vec, int bx_size, int by_size, struct dataobj *restrict u_vec, const int otime, const int ox, const int oy)\n", "{\n", - " float (*restrict buf)[x_size][y_size] __attribute__ ((aligned (64))) = (float (*)[x_size][y_size]) buf_vec;\n", + " float (*restrict buf)[bx_size][by_size] __attribute__ ((aligned (64))) = (float (*)[bx_size][by_size]) buf_vec;\n", " float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data;\n", "\n", " const int x_m = 0;\n", " const int y_m = 0;\n", - " const int x_M = x_size - 1;\n", - " const int y_M = y_size - 1;\n", + " const int x_M = bx_size - 1;\n", + " const int y_M = by_size - 1;\n", "\n", " for (int x = x_m; x <= x_M; x += 1)\n", " {\n", From 3cd5ae6b95dec75e5f8a7820c0f5b41725a52202 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Tue, 30 May 2023 08:30:15 +0000 Subject: [PATCH 07/22] compiler: Patch merge_halospot in presence of Conditionals --- devito/ir/iet/visitors.py | 23 +++++++++++++++++------ devito/passes/iet/mpi.py | 33 +++++++++++++++++++++++---------- tests/test_mpi.py | 29 ++++++++++++++++++++++++++++- 3 files changed, 68 insertions(+), 17 deletions(-) diff --git a/devito/ir/iet/visitors.py b/devito/ir/iet/visitors.py index e74a8935a1..07400e7c7f 100644 --- a/devito/ir/iet/visitors.py +++ b/devito/ir/iet/visitors.py @@ -26,8 +26,8 @@ __all__ = ['FindApplications', 'FindNodes', 'FindSections', 'FindSymbols', - 'MapExprStmts', 'MapNodes', 'IsPerfectIteration', 'printAST', 'CGen', - 'CInterface', 'Transformer', 'Uxreplace'] + 'MapExprStmts', 'MapHaloSpots', 'MapNodes', 'IsPerfectIteration', + 'printAST', 'CGen', 'CInterface', 'Transformer', 'Uxreplace'] class Visitor(GenericVisitor): @@ -737,14 +737,17 @@ def visit_Conditional(self, o, ret=None, queue=None): return ret -class MapExprStmts(FindSections): +class MapKind(FindSections): """ - Construct a mapper from ExprStmts, i.e. expression statements such as Calls - and Expressions, to their enclosing block (e.g., Iteration, Block). + Base class to construct mappers from Nodes of given type to their enclosing + scope of Nodes. """ - def visit_ExprStmt(self, o, ret=None, queue=None): + # NOTE: Ideally, we would use a metaclass that dynamically constructs mappers + # for the kind supplied by the caller, but it'd be overkill at the moment + + def visit_dummy(self, o, ret=None, queue=None): if ret is None: ret = self.default_retval() ret[o] = as_tuple(queue) @@ -754,6 +757,14 @@ def visit_ExprStmt(self, o, ret=None, queue=None): visit_Block = FindSections.visit_Iteration +class MapExprStmts(MapKind): + visit_ExprStmt = MapKind.visit_dummy + + +class MapHaloSpots(MapKind): + visit_HaloSpot = MapKind.visit_dummy + + class MapNodes(Visitor): @classmethod diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index ebafdfb460..00d96213aa 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -3,7 +3,8 @@ from sympy import S from devito.ir.iet import (Call, Expression, HaloSpot, Iteration, FindNodes, - MapNodes, Transformer, retrieve_iteration_tree) + MapNodes, MapHaloSpots, Transformer, + retrieve_iteration_tree) from devito.ir.support import PARALLEL, Scope from devito.mpi.halo_scheme import HaloScheme from devito.mpi.routines import HaloExchangeBuilder @@ -154,8 +155,14 @@ def rule2(dep, hs, loc_indices): rules = [rule0, rule1, rule2] # Analysis + cond_mapper = MapHaloSpots().visit(iet) + cond_mapper = {hs: {i for i in v if i.is_Conditional} + for hs, v in cond_mapper.items()} + + iter_mapper = MapNodes(Iteration, HaloSpot, 'immediate').visit(iet) + mapper = {} - for i, halo_spots in MapNodes(Iteration, HaloSpot, 'immediate').visit(iet).items(): + for i, halo_spots in iter_mapper.items(): if i is None or len(halo_spots) <= 1: continue @@ -164,20 +171,26 @@ def rule2(dep, hs, loc_indices): hs0 = halo_spots[0] mapper[hs0] = hs0.halo_scheme - for hs in halo_spots[1:]: - mapper[hs] = hs.halo_scheme + for hs1 in halo_spots[1:]: + mapper[hs1] = hs1.halo_scheme - for f, v in hs.fmapper.items(): + # If there are Conditionals involved, both `hs0` and `hs1` must be + # within the same Conditional, otherwise we would break the control + # flow semantics + if cond_mapper.get(hs0) != cond_mapper.get(hs1): + continue + + for f, v in hs1.fmapper.items(): for dep in scope.d_flow.project(f): - if not any(r(dep, hs, v.loc_indices) for r in rules): + if not any(r(dep, hs1, v.loc_indices) for r in rules): break else: try: - mapper[hs0] = HaloScheme.union([mapper[hs0], - hs.halo_scheme.project(f)]) - mapper[hs] = mapper[hs].drop(f) + hs = hs1.halo_scheme.project(f) + mapper[hs0] = HaloScheme.union([mapper[hs0], hs]) + mapper[hs1] = mapper[hs1].drop(f) except ValueError: - # `hs.loc_indices= 2) + + eqns = [Eq(f.forward, f.dx2 + 1, implicit_dims=cd0), + Eq(h.forward, h.dx2 + 1, implicit_dims=cd1)] + + op = Operator(eqns) + + calls = FindNodes(Call).visit(op) + assert len(calls) == 2 + conds = FindNodes(Conditional).visit(op) + assert len(conds) == 2 + assert all(isinstance(i.then_body[0].body[0].body[0].body[0], + HaloUpdateList) for i in conds) + @pytest.mark.parametrize('expr,expected', [ ('f[t,x-1,y] + f[t,x+1,y]', {'rc', 'lc'}), ('f[t,x,y-1] + f[t,x,y+1]', {'cr', 'cl'}), From 134dcd379fc1a6191a7109c14119b237f57827d9 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Tue, 30 May 2023 14:40:09 +0000 Subject: [PATCH 08/22] compiler: Enable rcompile customization --- devito/operator/operator.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/devito/operator/operator.py b/devito/operator/operator.py index 1e201f499d..73cf07f8e4 100644 --- a/devito/operator/operator.py +++ b/devito/operator/operator.py @@ -995,6 +995,19 @@ def __setstate__(self, state): self._lib.name = soname +# Default action (perform or bypass) for selected compilation passes upon +# recursive compilation +# NOTE: it may not only be pointless to apply the following passes recursively +# (because once, during the main compilation phase, is simply enough), but also +# dangerous as some of them (the minority) might break in some circumstances +# if applied in cascade (e.g., `linearization` on top of `linearization`) +rcompile_registry = { + 'mpi': False, + 'linearize': False, + 'place-transfers': False +} + + def rcompile(expressions, kwargs=None): """ Perform recursive compilation on an ordered sequence of symbolic expressions. @@ -1008,14 +1021,7 @@ def rcompile(expressions, kwargs=None): # Tweak the compilation kwargs options = dict(kwargs['options']) - # NOTE: it is not only pointless to apply the following passes recursively - # (because once, during the main compilation phase, is simply enough), but - # also dangerous as a handful of compiler passes, the minority, might break - # in some circumstances if applied in cascade (e.g., `linearization` on top - # of `linearization`) - options['mpi'] = False - options['linearize'] = False # Will be carried out later on - options['place-transfers'] = False + options.update(rcompile_registry) kwargs['options'] = options # Recursive profiling not supported -- would be a complete mess From 864af117d24caf5b194ba8d01a8619049df1e81a Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Wed, 31 May 2023 09:18:38 +0000 Subject: [PATCH 09/22] mpi: Patch MPIMsg construction --- devito/mpi/routines.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/devito/mpi/routines.py b/devito/mpi/routines.py index 679c755f07..c5f9bce6ea 100644 --- a/devito/mpi/routines.py +++ b/devito/mpi/routines.py @@ -1164,11 +1164,11 @@ def _as_number(self, v, args): sanity checks to ensure we get a Symbol iff the Msg is for an Array. """ if is_integer(v): - return v + return int(v) else: assert self.target.c0.is_Array assert args is not None - return v.subs(args) + return int(v.subs(args)) def _arg_defaults(self, allocator, alias, args=None): # Lazy initialization if `allocator` is necessary as the `allocator` From c683071d3fdd2c05691a25ebac562608215d2ed8 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Wed, 31 May 2023 12:48:37 +0000 Subject: [PATCH 10/22] compiler: Patch double-buffering --- devito/passes/clusters/buffering.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/devito/passes/clusters/buffering.py b/devito/passes/clusters/buffering.py index ea31d5d0da..4bdcc68e12 100644 --- a/devito/passes/clusters/buffering.py +++ b/devito/passes/clusters/buffering.py @@ -498,13 +498,13 @@ def writeto(self): directions = {} for d, h in zip(self.buffer.dimensions, self.buffer._size_halo): try: - interval, si, direction = self.itintervals_mapper[d] + i, si, direction = self.itintervals_mapper[d] # The initialization must comprise the halo region as well, since # in principle this could be accessed through a stencil - interval = interval.translate(v0=-h.left, v1=h.right) + interval = Interval(i.dim, -h.left, h.right, i.stamp) except KeyError: assert d is self.xd - interval, si, direction = Interval(d, 0, 0), (), Forward + interval, si, direction = Interval(d), (), Forward intervals.append(interval) sub_iterators[d] = si directions[d] = direction From 23a582496f827e15f4f906b9cee48ccc56bed70a Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Thu, 1 Jun 2023 09:16:16 +0000 Subject: [PATCH 11/22] compiler: Fix BoundSymbol and Indirection pickling --- devito/types/basic.py | 2 ++ devito/types/misc.py | 6 ++++-- tests/test_pickle.py | 32 +++++++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/devito/types/basic.py b/devito/types/basic.py index 620b430290..0610c88ace 100644 --- a/devito/types/basic.py +++ b/devito/types/basic.py @@ -1326,6 +1326,8 @@ class BoundSymbol(AbstractSymbol): BoundSymbol will also become a garbage collector candidate. """ + __rkwargs__ = AbstractSymbol.__rkwargs__ + ('function',) + def __new__(cls, *args, function=None, **kwargs): obj = AbstractSymbol.__new__(cls, *args, **kwargs) obj._function = function diff --git a/devito/types/misc.py b/devito/types/misc.py index 031762c69e..623fbcd69a 100644 --- a/devito/types/misc.py +++ b/devito/types/misc.py @@ -161,8 +161,10 @@ class Indirection(Symbol): __rkwargs__ = Symbol.__rkwargs__ + ('mapped',) - def __new__(cls, name=None, mapped=None, dtype=np.uint64, is_const=True): - obj = super().__new__(cls, name=name, dtype=dtype, is_const=is_const) + def __new__(cls, name=None, mapped=None, dtype=np.uint64, is_const=True, + **kwargs): + obj = super().__new__(cls, name=name, dtype=dtype, is_const=is_const, + **kwargs) obj.mapped = mapped return obj diff --git a/tests/test_pickle.py b/tests/test_pickle.py index e0c431afcb..6e04127106 100644 --- a/tests/test_pickle.py +++ b/tests/test_pickle.py @@ -14,7 +14,8 @@ MPIRegion) from devito.types import (Array, CustomDimension, Symbol as dSymbol, Scalar, PointerArray, Lock, PThreadArray, SharedData, Timer, - DeviceID, NPThreads, ThreadID, TempFunction) + DeviceID, NPThreads, ThreadID, TempFunction, Indirection) +from devito.types.basic import BoundSymbol from devito.tools import EnrichedTuple from devito.symbolics import (IntDiv, ListInitializer, FieldFromPointer, CallFromPointer, DefFunction) @@ -117,6 +118,35 @@ def test_internal_symbols(): assert new_s.assumptions0['nonnegative'] is True +def test_bound_symbol(): + grid = Grid(shape=(3, 3, 3)) + f = Function(name='f', grid=grid) + + bs = f._C_symbol + pkl_bs = pickle.dumps(bs) + new_bs = pickle.loads(pkl_bs) + + assert isinstance(new_bs, BoundSymbol) + assert new_bs.name == bs.name + assert isinstance(new_bs.function, Function) + assert str(new_bs.function) == str(bs.function) + + +def test_indirection(): + grid = Grid(shape=(3, 3, 3)) + f = Function(name='f', grid=grid) + + ind = Indirection(name='ofs', mapped=f) + + pkl_ind = pickle.dumps(ind) + new_ind = pickle.loads(pkl_ind) + + assert new_ind.name == ind.name + assert isinstance(new_ind.mapped, Function) + assert str(new_ind.mapped) == str(ind.mapped) == str(f) + assert new_ind.dtype == ind.dtype + + def test_array(): grid = Grid(shape=(3, 3)) d = Dimension(name='d') From ca81cea00f149ce7b39ad69cdeedec3a7d266f7a Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Thu, 1 Jun 2023 09:34:21 +0000 Subject: [PATCH 12/22] compiler: Enable HierarchyLayer comparison --- devito/types/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/devito/types/utils.py b/devito/types/utils.py index 3266a27ff8..5d1d79f9c6 100644 --- a/devito/types/utils.py +++ b/devito/types/utils.py @@ -59,6 +59,13 @@ def __init__(self, suffix=''): def __repr__(self): return "Layer<%s>" % self.suffix + def __eq__(self, other): + return (isinstance(other, HierarchyLayer) and + self.suffix == other.suffix) + + def __hash__(self): + return hash(self.suffix) + class HostLayer(HierarchyLayer): pass From e87a0c7b4bba4bbf98f45de4508dad8f0e9f1993 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Thu, 1 Jun 2023 12:17:25 +0000 Subject: [PATCH 13/22] misc: Switch from pickle to cloudpickle --- devito/passes/iet/parpragma.py | 4 +++- tests/test_gpu_common.py | 21 +++++++++++++++++++++ tests/test_pickle.py | 2 +- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py index eccb506e7b..08818787f9 100644 --- a/devito/passes/iet/parpragma.py +++ b/devito/passes/iet/parpragma.py @@ -527,7 +527,9 @@ def function(self): def imask(self): return self._imask - @cached_property + # TODO: cached_property here will break our pickling tests for reasons that + # are still mysterious after considerable investigation + @property def sections(self): return make_sections_from_imask(self.function, self.imask) diff --git a/tests/test_gpu_common.py b/tests/test_gpu_common.py index 1eb11f02aa..1e465ea15b 100644 --- a/tests/test_gpu_common.py +++ b/tests/test_gpu_common.py @@ -1,3 +1,5 @@ +import cloudpickle as pickle + import pytest import numpy as np import scipy.sparse @@ -1348,6 +1350,25 @@ def test_npthreads(self): assert op.arguments(time_M=2, npthreads0=5) +class TestMisc(object): + + def test_pickling(self): + grid = Grid(shape=(10, 10)) + + u = TimeFunction(name='u', grid=grid) + usave = TimeFunction(name="usave", grid=grid, save=10) + + eqns = [Eq(u.forward, u + 1), + Eq(usave, u.forward)] + + op = Operator(eqns) + + pkl_op = pickle.dumps(op) + new_op = pickle.loads(pkl_op) + + assert str(op) == str(new_op) + + class TestEdgeCases(object): def test_empty_arrays(self): diff --git a/tests/test_pickle.py b/tests/test_pickle.py index 6e04127106..653beace88 100644 --- a/tests/test_pickle.py +++ b/tests/test_pickle.py @@ -1,7 +1,7 @@ import pytest import numpy as np from sympy import Symbol -import pickle +import cloudpickle as pickle from conftest import skipif from devito import (Constant, Eq, Function, TimeFunction, SparseFunction, Grid, From b596daeb602e890b5a424370419730647712fa72 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Thu, 1 Jun 2023 13:44:45 +0000 Subject: [PATCH 14/22] compiler: Patch HaloTouch via uxreplace --- devito/mpi/halo_scheme.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index b133156ade..0204c171e6 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -592,9 +592,6 @@ def _uxreplace_dispatch_haloscheme(hs0, rule): for f, hse0 in hs0.fmapper.items(): # Is it an attempt to replace `f`? for i, v in rule.items(): - loc_indices = {} - loc_dirs = {} - if i is f: # Yes! g = v @@ -605,19 +602,23 @@ def _uxreplace_dispatch_haloscheme(hs0, rule): # differ; let's infer them from the context g = v.function + loc_indices = {} + loc_dirs = {} for d0, loc_index in hse0.loc_indices.items(): if i.indices[d0] == loc_index: # They indeed do change d1 = g.indices[d0] loc_indices[d1] = v.indices[d0] loc_dirs[d1] = hse0.loc_dirs[d0] - else: - loc_indices[d0] = loc_index - loc_dirs[d0] = hse0.loc_dirs[d0] + + if len(loc_indices) != len(hse0.loc_indices): + # Nope, let's try with the next Indexed, if any + continue hse = HaloSchemeEntry(frozendict(loc_indices), frozendict(loc_dirs), hse0.halos, hse0.dims) + else: continue From a185119a4d4dd52d7d5ccf97c1f3a7889bdd73fe Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Wed, 19 Apr 2023 08:30:29 +0000 Subject: [PATCH 15/22] compiler: Enhance Reconstructable with variadic args support --- devito/tools/abc.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/devito/tools/abc.py b/devito/tools/abc.py index 8789f40b9c..b8c6cf3516 100644 --- a/devito/tools/abc.py +++ b/devito/tools/abc.py @@ -231,8 +231,16 @@ def __reduce_ex__(self, proto): ) def __getnewargs_ex__(self): - return (tuple(getattr(self, i) for i in self._pickle_rargs), - {i: getattr(self, i) for i in self._pickle_rkwargs}) + args = [] + for i in self._pickle_rargs: + if i.startswith('*'): + args.extend(getattr(self, i[1:])) + else: + args.append(getattr(self, i)) + + kwargs = {i: getattr(self, i) for i in self._pickle_rkwargs} + + return (tuple(args), kwargs) class Singleton(type): From 04ae0ecf78a9b273cb30791a6b8d18399d0a6f16 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Thu, 1 Jun 2023 15:35:59 +0000 Subject: [PATCH 16/22] compiler: Revamp FIndexed for correct reconstruction --- devito/types/misc.py | 2 ++ tests/test_pickle.py | 17 ++++++++++++++++- tests/test_symbolics.py | 16 ++++++++++++++-- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/devito/types/misc.py b/devito/types/misc.py index 623fbcd69a..c644a60614 100644 --- a/devito/types/misc.py +++ b/devito/types/misc.py @@ -109,6 +109,8 @@ def free_symbols(self): return (super().free_symbols | set().union(*[i.free_symbols for i in self.strides])) + func = Pickable._rebuild + # Pickling support __reduce_ex__ = Pickable.__reduce_ex__ diff --git a/tests/test_pickle.py b/tests/test_pickle.py index 653beace88..9914aea0a8 100644 --- a/tests/test_pickle.py +++ b/tests/test_pickle.py @@ -14,7 +14,8 @@ MPIRegion) from devito.types import (Array, CustomDimension, Symbol as dSymbol, Scalar, PointerArray, Lock, PThreadArray, SharedData, Timer, - DeviceID, NPThreads, ThreadID, TempFunction, Indirection) + DeviceID, NPThreads, ThreadID, TempFunction, Indirection, + FIndexed) from devito.types.basic import BoundSymbol from devito.tools import EnrichedTuple from devito.symbolics import (IntDiv, ListInitializer, FieldFromPointer, @@ -282,6 +283,20 @@ def test_shared_data(): assert indexed.name == new_indexed.name +def test_findexed(): + grid = Grid(shape=(3, 3, 3)) + f = Function(name='f', grid=grid) + + fi = FIndexed.from_indexed(f.indexify(), "foo", strides=(1, 2)) + + pkl_fi = pickle.dumps(fi) + new_fi = pickle.loads(pkl_fi) + + assert new_fi.name == fi.name + assert new_fi.pname == fi.pname + assert new_fi.strides == fi.strides + + def test_guard_factor(): d = Dimension(name='d') cd = ConditionalDimension(name='cd', parent=d, factor=4) diff --git a/tests/test_symbolics.py b/tests/test_symbolics.py index 751cb197cb..cb2602507b 100644 --- a/tests/test_symbolics.py +++ b/tests/test_symbolics.py @@ -12,8 +12,8 @@ CallFromPointer, Cast, DefFunction, FieldFromPointer, INT, FieldFromComposite, IntDiv, ccode, uxreplace) from devito.tools import as_tuple -from devito.types import Array, Bundle, LocalObject, Object, Symbol as dSymbol -from devito.types import Array, Bundle, FIndexed, LocalObject, Object, Symbol as dSymbol # noqa +from devito.types import (Array, Bundle, FIndexed, LocalObject, Object, + Symbol as dSymbol) def test_float_indices(): @@ -304,6 +304,18 @@ class MyLocalObject(LocalObject, Expr): assert str(lo + 2) == '2 + lo' +def test_findexed(): + grid = Grid(shape=(3, 3, 3)) + f = Function(name='f', grid=grid) + + fi = FIndexed.from_indexed(f.indexify(), "foo", strides=(1, 2)) + new_fi = fi.func(strides=(3, 4)) + + assert new_fi.name == fi.name == 'f' + assert new_fi.indices == fi.indices + assert new_fi.strides == (3, 4) + + def test_is_on_grid(): grid = Grid((10,)) x = grid.dimensions[0] From 0da295ea704dc47a524da65a25a664d93efd6bfb Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Sun, 4 Jun 2023 12:23:17 +0000 Subject: [PATCH 17/22] compiler: Patch c_char_p lowering --- devito/types/basic.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/devito/types/basic.py b/devito/types/basic.py index 0610c88ace..044ea0e177 100644 --- a/devito/types/basic.py +++ b/devito/types/basic.py @@ -1,6 +1,6 @@ import abc from collections import namedtuple -from ctypes import POINTER, _Pointer +from ctypes import POINTER, _Pointer, c_char_p, c_char from functools import reduce from operator import mul @@ -81,6 +81,11 @@ def _C_typedata(self): _type = self._C_ctype while issubclass(_type, _Pointer): _type = _type._type_ + + # `ctypes` treats C strings specially + if _type is c_char_p: + _type = c_char + return ctypes_to_cstr(_type) @abc.abstractproperty From dc43c52e9470b4768d351bea31dba53361dc1d9b Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Sun, 4 Jun 2023 13:25:15 +0000 Subject: [PATCH 18/22] compiler: Simplify is_on_device --- devito/passes/__init__.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/devito/passes/__init__.py b/devito/passes/__init__.py index 6e499f3ed3..4ea9529f36 100644 --- a/devito/passes/__init__.py +++ b/devito/passes/__init__.py @@ -23,14 +23,8 @@ def is_on_device(obj, gpu_fit): except AttributeError: functions = as_tuple(obj) - fsave = [] - for i in functions: - try: - f = i.alias or i - except AttributeError: - f = i - if isinstance(f, TimeFunction) and is_integer(f.save): - fsave.append(f) + fsave = [f for f in functions + if isinstance(f, TimeFunction) and is_integer(f.save)] if 'all-fallback' in gpu_fit and fsave: warning("TimeFunction %s assumed to fit the GPU memory" % fsave) From 1a20fa825657315ff34ddf5eb70dd64c9272314b Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Sun, 4 Jun 2023 15:49:07 +0000 Subject: [PATCH 19/22] compiler: Impose canonical ordering for HaloTouch args --- devito/ir/clusters/algorithms.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/devito/ir/clusters/algorithms.py b/devito/ir/clusters/algorithms.py index 0d6c72110d..2c0bdacc0c 100644 --- a/devito/ir/clusters/algorithms.py +++ b/devito/ir/clusters/algorithms.py @@ -373,6 +373,11 @@ def callback(self, clusters, prefix, seen=None): # be rescheduled after `c` upon topological sorting points.update(a.access for a in c.scope.accesses if a.is_write) + # Sort for determinism + # NOTE: not sorting might impact code generation. The order of + # the args is important because that's what search functions honor! + points = sorted(points, key=str) + rhs = HaloTouch(*points, halo_scheme=halo_scheme) # Insert only if not redundant, to avoid useless pollution From a38db4bc7b3aa6946e5051cdc5cdc75de6b516f1 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Wed, 7 Jun 2023 06:20:36 +0000 Subject: [PATCH 20/22] compiler: Promote distributor to AbstractFunction --- devito/types/array.py | 7 ------- devito/types/basic.py | 11 +++++++++++ devito/types/dense.py | 9 --------- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/devito/types/array.py b/devito/types/array.py index 69d9ed0889..8059471b4e 100644 --- a/devito/types/array.py +++ b/devito/types/array.py @@ -211,13 +211,6 @@ def _mem_shared(self): def _mem_constant(self): return self._scope == 'constant' - @property - def _distributor(self): - try: - return self.grid.distributor - except AttributeError: - return None - @property def initvalue(self): return self._initvalue diff --git a/devito/types/basic.py b/devito/types/basic.py index 044ea0e177..c450dd0335 100644 --- a/devito/types/basic.py +++ b/devito/types/basic.py @@ -878,6 +878,9 @@ def __init_finalize__(self, *args, **kwargs): # There may or may not be a `Grid` self._grid = kwargs.get('grid') + # A `Distributor` to handle domain decomposition + self._distributor = self.__distributor_setup__(**kwargs) + # Symbol properties # "Aliasing" another DiscreteFunction means that `self` logically # represents another object. For example, `self` might be used as the @@ -921,6 +924,14 @@ def __padding_setup__(self, **kwargs): padding = tuple(kwargs.get('padding', [(0, 0) for i in range(self.ndim)])) return DimensionTuple(*padding, getters=self.dimensions) + def __distributor_setup__(self, **kwargs): + # There may or may not be a `Distributor`. In the latter case, the + # AbstractFunction is to be considered "local" to each MPI rank + try: + return kwargs.get('grid').distributor + except AttributeError: + return kwargs.get('distributor') + @cached_property def _honors_autopadding(self): """ diff --git a/devito/types/dense.py b/devito/types/dense.py index a2daec211f..912ae5dfe2 100644 --- a/devito/types/dense.py +++ b/devito/types/dense.py @@ -60,9 +60,6 @@ class DiscreteFunction(AbstractFunction, ArgProvider, Differentiable): __rkwargs__ = AbstractFunction.__rkwargs__ + ('staggered', 'initializer') def __init_finalize__(self, *args, **kwargs): - # A `Distributor` to handle domain decomposition (only relevant for MPI) - self._distributor = self.__distributor_setup__(**kwargs) - # Staggering metadata self._staggered = self.__staggered_setup__(**kwargs) @@ -171,12 +168,6 @@ def __staggered_setup__(self, **kwargs): staggered = self.dimensions return staggered - def __distributor_setup__(self, **kwargs): - grid = kwargs.get('grid') - # There may or may not be a `Distributor`. In the latter case, the - # DiscreteFunction is to be considered "local" to each MPI rank - return kwargs.get('distributor') if grid is None else grid.distributor - @cached_property def _functions(self): return {self.function} From 25fbc791d742568b95ff002f788b3869303a23ec Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Wed, 7 Jun 2023 06:21:03 +0000 Subject: [PATCH 21/22] tests: Try both pickle and cloudpickle --- tests/test_pickle.py | 1284 +++++++++++++++++++++--------------------- 1 file changed, 628 insertions(+), 656 deletions(-) diff --git a/tests/test_pickle.py b/tests/test_pickle.py index 9914aea0a8..2d017cbb59 100644 --- a/tests/test_pickle.py +++ b/tests/test_pickle.py @@ -1,7 +1,9 @@ +import pickle as pickle0 +import cloudpickle as pickle1 + import pytest import numpy as np from sympy import Symbol -import cloudpickle as pickle from conftest import skipif from devito import (Constant, Eq, Function, TimeFunction, SparseFunction, Grid, @@ -24,773 +26,743 @@ TimeAxis, RickerSource, Receiver) -def test_constant(): - c = Constant(name='c') - assert c.data == 0. - c.data = 1. - - pkl_c = pickle.dumps(c) - new_c = pickle.loads(pkl_c) - - # .data is initialized, so it should have been pickled too - assert np.all(c.data == 1.) - assert np.all(new_c.data == 1.) - - -def test_dimension(): - d = Dimension(name='d') - - pkl_d = pickle.dumps(d) - new_d = pickle.loads(pkl_d) - - assert d.name == new_d.name - assert d.dtype == new_d.dtype - - -def test_enrichedtuple(): - # Dummy enriched tuple - tup = EnrichedTuple(11, 31, getters=('a', 'b'), left=[3, 4], right=[5, 6]) - - pkl_t = pickle.dumps(tup) - new_t = pickle.loads(pkl_t) - - assert new_t == tup # This only tests the actual tuple - assert new_t._getters == tup._getters - assert new_t.left == tup.left - assert new_t.right == tup.right - - -def test_function(): - grid = Grid(shape=(3, 3, 3)) - f = Function(name='f', grid=grid) - f.data[0] = 1. - - pkl_f = pickle.dumps(f) - new_f = pickle.loads(pkl_f) - - # .data is initialized, so it should have been pickled too - assert np.all(f.data[0] == 1.) - assert np.all(new_f.data[0] == 1.) - - assert f.space_order == new_f.space_order - assert f.dtype == new_f.dtype - assert f.shape == new_f.shape - - -def test_sparse_function(): - grid = Grid(shape=(3,)) - sf = SparseFunction(name='sf', grid=grid, npoint=3, space_order=2, - coordinates=[(0.,), (1.,), (2.,)]) - sf.data[0] = 1. - - pkl_sf = pickle.dumps(sf) - new_sf = pickle.loads(pkl_sf) - - # .data is initialized, so it should have been pickled too - assert np.all(sf.data[0] == 1.) - assert np.all(new_sf.data[0] == 1.) - - # coordinates should also have been pickled - assert np.all(sf.coordinates.data == new_sf.coordinates.data) - - assert sf.space_order == new_sf.space_order - assert sf.dtype == new_sf.dtype - assert sf.npoint == new_sf.npoint - - -def test_internal_symbols(): - s = dSymbol(name='s', dtype=np.float32) - pkl_s = pickle.dumps(s) - new_s = pickle.loads(pkl_s) - assert new_s.name == s.name - assert new_s.dtype is np.float32 - - s = Scalar(name='s', dtype=np.int32, is_const=True) - pkl_s = pickle.dumps(s) - new_s = pickle.loads(pkl_s) - assert new_s.name == s.name - assert new_s.dtype is np.int32 - assert new_s.is_const is True - - s = Scalar(name='s', nonnegative=True) - pkl_s = pickle.dumps(s) - new_s = pickle.loads(pkl_s) - assert new_s.name == s.name - assert new_s.assumptions0['nonnegative'] is True - - -def test_bound_symbol(): - grid = Grid(shape=(3, 3, 3)) - f = Function(name='f', grid=grid) - - bs = f._C_symbol - pkl_bs = pickle.dumps(bs) - new_bs = pickle.loads(pkl_bs) - - assert isinstance(new_bs, BoundSymbol) - assert new_bs.name == bs.name - assert isinstance(new_bs.function, Function) - assert str(new_bs.function) == str(bs.function) - +@pytest.mark.parametrize('pickle', [pickle0, pickle1]) +class TestBasic(object): -def test_indirection(): - grid = Grid(shape=(3, 3, 3)) - f = Function(name='f', grid=grid) + def test_constant(self, pickle): + c = Constant(name='c') + assert c.data == 0. + c.data = 1. - ind = Indirection(name='ofs', mapped=f) + pkl_c = pickle.dumps(c) + new_c = pickle.loads(pkl_c) - pkl_ind = pickle.dumps(ind) - new_ind = pickle.loads(pkl_ind) + # .data is initialized, so it should have been pickled too + assert np.all(c.data == 1.) + assert np.all(new_c.data == 1.) - assert new_ind.name == ind.name - assert isinstance(new_ind.mapped, Function) - assert str(new_ind.mapped) == str(ind.mapped) == str(f) - assert new_ind.dtype == ind.dtype + def test_dimension(self, pickle): + d = Dimension(name='d') + pkl_d = pickle.dumps(d) + new_d = pickle.loads(pkl_d) -def test_array(): - grid = Grid(shape=(3, 3)) - d = Dimension(name='d') + assert d.name == new_d.name + assert d.dtype == new_d.dtype - a = Array(name='a', dimensions=grid.dimensions, dtype=np.int32, halo=((1, 1), (2, 2)), - padding=((2, 2), (2, 2)), space='host', scope='stack') + def test_enrichedtuple(self, pickle): + # Dummy enriched tuple + tup = EnrichedTuple(11, 31, getters=('a', 'b'), left=[3, 4], right=[5, 6]) - pkl_a = pickle.dumps(a) - new_a = pickle.loads(pkl_a) - assert new_a.name == a.name - assert new_a.dtype is np.int32 - assert new_a.dimensions[0].name == 'x' - assert new_a.dimensions[1].name == 'y' - assert new_a.halo == ((1, 1), (2, 2)) - assert new_a.padding == ((2, 2), (2, 2)) - assert new_a.space == 'host' - assert new_a.scope == 'stack' + pkl_t = pickle.dumps(tup) + new_t = pickle.loads(pkl_t) - # Now with a pointer array - pa = PointerArray(name='pa', dimensions=d, array=a) + assert new_t == tup # This only tests the actual tuple + assert new_t._getters == tup._getters + assert new_t.left == tup.left + assert new_t.right == tup.right + + def test_function(self, pickle): + grid = Grid(shape=(3, 3, 3)) + f = Function(name='f', grid=grid) + f.data[0] = 1. - pkl_pa = pickle.dumps(pa) - new_pa = pickle.loads(pkl_pa) - assert new_pa.name == pa.name - assert new_pa.dim.name == 'd' - assert new_pa.array.name == 'a' + pkl_f = pickle.dumps(f) + new_f = pickle.loads(pkl_f) + + # .data is initialized, so it should have been pickled too + assert np.all(f.data[0] == 1.) + assert np.all(new_f.data[0] == 1.) + + assert f.space_order == new_f.space_order + assert f.dtype == new_f.dtype + assert f.shape == new_f.shape + + def test_sparse_function(self, pickle): + grid = Grid(shape=(3,)) + sf = SparseFunction(name='sf', grid=grid, npoint=3, space_order=2, + coordinates=[(0.,), (1.,), (2.,)]) + sf.data[0] = 1. + + pkl_sf = pickle.dumps(sf) + new_sf = pickle.loads(pkl_sf) + + # .data is initialized, so it should have been pickled too + assert np.all(sf.data[0] == 1.) + assert np.all(new_sf.data[0] == 1.) + + # coordinates should also have been pickled + assert np.all(sf.coordinates.data == new_sf.coordinates.data) + + assert sf.space_order == new_sf.space_order + assert sf.dtype == new_sf.dtype + assert sf.npoint == new_sf.npoint + + def test_internal_symbols(self, pickle): + s = dSymbol(name='s', dtype=np.float32) + pkl_s = pickle.dumps(s) + new_s = pickle.loads(pkl_s) + assert new_s.name == s.name + assert new_s.dtype is np.float32 + + s = Scalar(name='s', dtype=np.int32, is_const=True) + pkl_s = pickle.dumps(s) + new_s = pickle.loads(pkl_s) + assert new_s.name == s.name + assert new_s.dtype is np.int32 + assert new_s.is_const is True + + s = Scalar(name='s', nonnegative=True) + pkl_s = pickle.dumps(s) + new_s = pickle.loads(pkl_s) + assert new_s.name == s.name + assert new_s.assumptions0['nonnegative'] is True + + def test_bound_symbol(self, pickle): + grid = Grid(shape=(3, 3, 3)) + f = Function(name='f', grid=grid) + + bs = f._C_symbol + pkl_bs = pickle.dumps(bs) + new_bs = pickle.loads(pkl_bs) + + assert isinstance(new_bs, BoundSymbol) + assert new_bs.name == bs.name + assert isinstance(new_bs.function, Function) + assert str(new_bs.function) == str(bs.function) + + def test_indirection(self, pickle): + grid = Grid(shape=(3, 3, 3)) + f = Function(name='f', grid=grid) + + ind = Indirection(name='ofs', mapped=f) + + pkl_ind = pickle.dumps(ind) + new_ind = pickle.loads(pkl_ind) + + assert new_ind.name == ind.name + assert isinstance(new_ind.mapped, Function) + assert str(new_ind.mapped) == str(ind.mapped) == str(f) + assert new_ind.dtype == ind.dtype + + def test_array(self, pickle): + grid = Grid(shape=(3, 3)) + d = Dimension(name='d') + + a = Array(name='a', dimensions=grid.dimensions, dtype=np.int32, + halo=((1, 1), (2, 2)), padding=((2, 2), (2, 2)), + space='host', scope='stack') + + pkl_a = pickle.dumps(a) + new_a = pickle.loads(pkl_a) + assert new_a.name == a.name + assert new_a.dtype is np.int32 + assert new_a.dimensions[0].name == 'x' + assert new_a.dimensions[1].name == 'y' + assert new_a.halo == ((1, 1), (2, 2)) + assert new_a.padding == ((2, 2), (2, 2)) + assert new_a.space == 'host' + assert new_a.scope == 'stack' + + # Now with a pointer array + pa = PointerArray(name='pa', dimensions=d, array=a) + + pkl_pa = pickle.dumps(pa) + new_pa = pickle.loads(pkl_pa) + assert new_pa.name == pa.name + assert new_pa.dim.name == 'd' + assert new_pa.array.name == 'a' + def test_sub_dimension(self, pickle): + di = SubDimension.middle('di', Dimension(name='d'), 1, 1) + + pkl_di = pickle.dumps(di) + new_di = pickle.loads(pkl_di) -def test_sub_dimension(): - di = SubDimension.middle('di', Dimension(name='d'), 1, 1) + assert di.name == new_di.name + assert di.dtype == new_di.dtype + assert di.parent == new_di.parent + assert di._thickness == new_di._thickness + assert di._interval == new_di._interval - pkl_di = pickle.dumps(di) - new_di = pickle.loads(pkl_di) + def test_conditional_dimension(self, pickle): + d = Dimension(name='d') + s = Scalar(name='s') + cd = ConditionalDimension(name='ci', parent=d, factor=4, condition=s > 3) - assert di.name == new_di.name - assert di.dtype == new_di.dtype - assert di.parent == new_di.parent - assert di._thickness == new_di._thickness - assert di._interval == new_di._interval + pkl_cd = pickle.dumps(cd) + new_cd = pickle.loads(pkl_cd) + assert cd.name == new_cd.name + assert cd.parent == new_cd.parent + assert cd.factor == new_cd.factor + assert cd.condition == new_cd.condition -def test_conditional_dimension(): - d = Dimension(name='d') - s = Scalar(name='s') - cd = ConditionalDimension(name='ci', parent=d, factor=4, condition=s > 3) + def test_incr_dimension(self, pickle): + s = Scalar(name='s') + d = Dimension(name='d') + dd = IncrDimension('dd', d, s, 5, 2) - pkl_cd = pickle.dumps(cd) - new_cd = pickle.loads(pkl_cd) + pkl_dd = pickle.dumps(dd) + new_dd = pickle.loads(pkl_dd) - assert cd.name == new_cd.name - assert cd.parent == new_cd.parent - assert cd.factor == new_cd.factor - assert cd.condition == new_cd.condition + assert dd.name == new_dd.name + assert dd.parent == new_dd.parent + assert dd.symbolic_min == new_dd.symbolic_min + assert dd.symbolic_max == new_dd.symbolic_max + assert dd.step == new_dd.step + def test_custom_dimension(self, pickle): + symbolic_size = Constant(name='d_custom_size') + d = CustomDimension(name='d', symbolic_size=symbolic_size) -def test_incr_dimension(): - s = Scalar(name='s') - d = Dimension(name='d') - dd = IncrDimension('dd', d, s, 5, 2) + pkl_d = pickle.dumps(d) + new_d = pickle.loads(pkl_d) + + assert d.name == new_d.name + assert d.symbolic_size.name == new_d.symbolic_size.name - pkl_dd = pickle.dumps(dd) - new_dd = pickle.loads(pkl_dd) + def test_lock(self, pickle): + ld = CustomDimension(name='ld', symbolic_size=2) + lock = Lock(name='lock', dimensions=ld) - assert dd.name == new_dd.name - assert dd.parent == new_dd.parent - assert dd.symbolic_min == new_dd.symbolic_min - assert dd.symbolic_max == new_dd.symbolic_max - assert dd.step == new_dd.step + pkl_lock = pickle.dumps(lock) + new_lock = pickle.loads(pkl_lock) + lock.name == new_lock.name + new_lock.dimensions[0].symbolic_size == ld.symbolic_size -def test_custom_dimension(): - symbolic_size = Constant(name='d_custom_size') - d = CustomDimension(name='d', symbolic_size=symbolic_size) + def test_p_thread_array(self, pickle): + a = PThreadArray(name='threads', npthreads=4) - pkl_d = pickle.dumps(d) - new_d = pickle.loads(pkl_d) + pkl_a = pickle.dumps(a) + new_a = pickle.loads(pkl_a) - assert d.name == new_d.name - assert d.symbolic_size.name == new_d.symbolic_size.name + assert a.name == new_a.name + assert a.dimensions[0].name == new_a.dimensions[0].name + assert a.size == new_a.size + def test_shared_data(self, pickle): + s = Scalar(name='s') + a = Scalar(name='a') -def test_lock(): - ld = CustomDimension(name='ld', symbolic_size=2) - lock = Lock(name='lock', dimensions=ld) + sdata = SharedData(name='sdata', npthreads=2, cfields=[s], ncfields=[a]) - pkl_lock = pickle.dumps(lock) - new_lock = pickle.loads(pkl_lock) + pkl_sdata = pickle.dumps(sdata) + new_sdata = pickle.loads(pkl_sdata) - lock.name == new_lock.name - new_lock.dimensions[0].symbolic_size == ld.symbolic_size + assert sdata.name == new_sdata.name + assert sdata.shape == new_sdata.shape + assert sdata.size == new_sdata.size + assert sdata.fields == new_sdata.fields + assert sdata.pfields == new_sdata.pfields + assert sdata.cfields == new_sdata.cfields + assert sdata.ncfields == new_sdata.ncfields + ffp = FieldFromPointer(sdata._field_flag, sdata.symbolic_base) -def test_p_thread_array(): - a = PThreadArray(name='threads', npthreads=4) + pkl_ffp = pickle.dumps(ffp) + new_ffp = pickle.loads(pkl_ffp) - pkl_a = pickle.dumps(a) - new_a = pickle.loads(pkl_a) + assert ffp == new_ffp - assert a.name == new_a.name - assert a.dimensions[0].name == new_a.dimensions[0].name - assert a.size == new_a.size + indexed = sdata[0] + pkl_indexed = pickle.dumps(indexed) + new_indexed = pickle.loads(pkl_indexed) -def test_shared_data(): - s = Scalar(name='s') - a = Scalar(name='a') + assert indexed.name == new_indexed.name - sdata = SharedData(name='sdata', npthreads=2, cfields=[s], ncfields=[a]) + def test_findexed(self, pickle): + grid = Grid(shape=(3, 3, 3)) + f = Function(name='f', grid=grid) - pkl_sdata = pickle.dumps(sdata) - new_sdata = pickle.loads(pkl_sdata) + fi = FIndexed.from_indexed(f.indexify(), "foo", strides=(1, 2)) - assert sdata.name == new_sdata.name - assert sdata.shape == new_sdata.shape - assert sdata.size == new_sdata.size - assert sdata.fields == new_sdata.fields - assert sdata.pfields == new_sdata.pfields - assert sdata.cfields == new_sdata.cfields - assert sdata.ncfields == new_sdata.ncfields + pkl_fi = pickle.dumps(fi) + new_fi = pickle.loads(pkl_fi) - ffp = FieldFromPointer(sdata._field_flag, sdata.symbolic_base) + assert new_fi.name == fi.name + assert new_fi.pname == fi.pname + assert new_fi.strides == fi.strides - pkl_ffp = pickle.dumps(ffp) - new_ffp = pickle.loads(pkl_ffp) + def test_symbolics(self, pickle): + a = Symbol('a') - assert ffp == new_ffp + id = IntDiv(a, 3) + pkl_id = pickle.dumps(id) + new_id = pickle.loads(pkl_id) + assert id == new_id - indexed = sdata[0] + ffp = CallFromPointer('foo', a, ['b', 'c']) + pkl_ffp = pickle.dumps(ffp) + new_ffp = pickle.loads(pkl_ffp) + assert ffp == new_ffp - pkl_indexed = pickle.dumps(indexed) - new_indexed = pickle.loads(pkl_indexed) + li = ListInitializer(['a', 'b']) + pkl_li = pickle.dumps(li) + new_li = pickle.loads(pkl_li) + assert li == new_li - assert indexed.name == new_indexed.name + df = DefFunction('f', ['a', 1, 2]) + pkl_df = pickle.dumps(df) + new_df = pickle.loads(pkl_df) + assert df == new_df + assert df.arguments == new_df.arguments + def test_timers(self, pickle): + """Pickling for Timers used in Operators for C-level profiling.""" + timer = Timer('timer', ['sec0', 'sec1']) + pkl_obj = pickle.dumps(timer) + new_obj = pickle.loads(pkl_obj) + assert new_obj.name == timer.name + assert new_obj.sections == timer.sections + assert new_obj.value._obj.sec0 == timer.value._obj.sec0 == 0.0 + assert new_obj.value._obj.sec1 == timer.value._obj.sec1 == 0.0 -def test_findexed(): - grid = Grid(shape=(3, 3, 3)) - f = Function(name='f', grid=grid) + def test_guard_factor(self, pickle): + d = Dimension(name='d') + cd = ConditionalDimension(name='cd', parent=d, factor=4) - fi = FIndexed.from_indexed(f.indexify(), "foo", strides=(1, 2)) + gf = GuardFactor(cd) - pkl_fi = pickle.dumps(fi) - new_fi = pickle.loads(pkl_fi) + pkl_gf = pickle.dumps(gf) + new_gf = pickle.loads(pkl_gf) - assert new_fi.name == fi.name - assert new_fi.pname == fi.pname - assert new_fi.strides == fi.strides + assert gf == new_gf + def test_temp_function(self, pickle): + grid = Grid(shape=(3, 3)) + d = Dimension(name='d') -def test_guard_factor(): - d = Dimension(name='d') - cd = ConditionalDimension(name='cd', parent=d, factor=4) + cf = TempFunction(name='f', dtype=np.float64, dimensions=grid.dimensions, + halo=((1, 1), (1, 1))) + + pkl_cf = pickle.dumps(cf) + new_cf = pickle.loads(pkl_cf) + assert new_cf.name == cf.name + assert new_cf.dtype is np.float64 + assert new_cf.halo == ((1, 1), (1, 1)) + assert new_cf.ndim == cf.ndim + assert new_cf.dim is None + + pcf = cf._make_pointer(d) + + pkl_pcf = pickle.dumps(pcf) + new_pcf = pickle.loads(pkl_pcf) + assert new_pcf.name == pcf.name + assert new_pcf.dim.name == 'd' + assert new_pcf.ndim == cf.ndim + 1 + assert new_pcf.halo == ((0, 0), (1, 1), (1, 1)) + + def test_deviceid(self, pickle): + did = DeviceID() - gf = GuardFactor(cd) + pkl_did = pickle.dumps(did) + new_did = pickle.loads(pkl_did) + # TODO: this will be extend when we'll support DeviceID + # for multi-node multi-gpu execution, when DeviceID will have + # to pick its default value from an MPI communicator attached + # to the runtime arguments + + assert did.name == new_did.name + assert did.dtype == new_did.dtype + + def test_npthreads(self, pickle): + npt = NPThreads(name='npt', size=3) + + pkl_npt = pickle.dumps(npt) + new_npt = pickle.loads(pkl_npt) + + assert npt.name == new_npt.name + assert npt.dtype == new_npt.dtype + assert npt.size == new_npt.size + + def test_receiver(self, pickle): + grid = Grid(shape=(3,)) + time_range = TimeAxis(start=0., stop=1000., step=0.1) + nreceivers = 3 + + rec = Receiver(name='rec', grid=grid, time_range=time_range, npoint=nreceivers, + coordinates=[(0.,), (1.,), (2.,)]) + rec.data[:] = 1. + + pkl_rec = pickle.dumps(rec) + new_rec = pickle.loads(pkl_rec) + + assert np.all(new_rec.data == 1) + assert np.all(new_rec.coordinates.data == [[0.], [1.], [2.]]) + + +@pytest.mark.parametrize('pickle', [pickle0, pickle1]) +class TestOperator(object): - pkl_gf = pickle.dumps(gf) - new_gf = pickle.loads(pkl_gf) + def test_geometry(self, pickle): - assert gf == new_gf + shape = (50, 50, 50) + spacing = [10. for _ in shape] + nbl = 10 + nrec = 10 + tn = 150. + # Create two-layer model from preset + model = demo_model(preset='layers-isotropic', vp_top=1., vp_bottom=2., + spacing=spacing, shape=shape, nbl=nbl) + # Source and receiver geometries + src_coordinates = np.empty((1, len(spacing))) + src_coordinates[0, :] = np.array(model.domain_size) * .5 + if len(shape) > 1: + src_coordinates[0, -1] = model.origin[-1] + 2 * spacing[-1] -def test_receiver(): - grid = Grid(shape=(3,)) - time_range = TimeAxis(start=0., stop=1000., step=0.1) - nreceivers = 3 + rec_coordinates = np.empty((nrec, len(spacing))) + rec_coordinates[:, 0] = np.linspace(0., model.domain_size[0], num=nrec) + if len(shape) > 1: + rec_coordinates[:, 1] = np.array(model.domain_size)[1] * .5 + rec_coordinates[:, -1] = model.origin[-1] + 2 * spacing[-1] + geometry = AcquisitionGeometry(model, rec_coordinates, src_coordinates, + t0=0.0, tn=tn, src_type='Ricker', f0=0.010) - rec = Receiver(name='rec', grid=grid, time_range=time_range, npoint=nreceivers, - coordinates=[(0.,), (1.,), (2.,)]) - rec.data[:] = 1. + pkl_geom = pickle.dumps(geometry) + new_geom = pickle.loads(pkl_geom) - pkl_rec = pickle.dumps(rec) - new_rec = pickle.loads(pkl_rec) + assert np.all(new_geom.src_positions == geometry.src_positions) + assert np.all(new_geom.rec_positions == geometry.rec_positions) + assert new_geom.f0 == geometry.f0 + assert np.all(new_geom.src_type == geometry.src_type) + assert np.all(new_geom.src.data == geometry.src.data) + assert new_geom.t0 == geometry.t0 + assert new_geom.tn == geometry.tn - assert np.all(new_rec.data == 1) - assert np.all(new_rec.coordinates.data == [[0.], [1.], [2.]]) + def test_operator_parameters(self, pickle): + grid = Grid(shape=(3, 3, 3)) + f = Function(name='f', grid=grid) + g = TimeFunction(name='g', grid=grid) + h = TimeFunction(name='h', grid=grid, save=10) + op = Operator(Eq(h.forward, h + g + f + 1)) + for i in op.parameters: + pkl_i = pickle.dumps(i) + pickle.loads(pkl_i) + def test_unjitted_operator(self, pickle): + grid = Grid(shape=(3, 3, 3)) + f = Function(name='f', grid=grid) -def test_geometry(): + op = Operator(Eq(f, f + 1)) - shape = (50, 50, 50) - spacing = [10. for _ in shape] - nbl = 10 - nrec = 10 - tn = 150. + pkl_op = pickle.dumps(op) + new_op = pickle.loads(pkl_op) - # Create two-layer model from preset - model = demo_model(preset='layers-isotropic', vp_top=1., vp_bottom=2., - spacing=spacing, shape=shape, nbl=nbl) - # Source and receiver geometries - src_coordinates = np.empty((1, len(spacing))) - src_coordinates[0, :] = np.array(model.domain_size) * .5 - if len(shape) > 1: - src_coordinates[0, -1] = model.origin[-1] + 2 * spacing[-1] + assert str(op) == str(new_op) - rec_coordinates = np.empty((nrec, len(spacing))) - rec_coordinates[:, 0] = np.linspace(0., model.domain_size[0], num=nrec) - if len(shape) > 1: - rec_coordinates[:, 1] = np.array(model.domain_size)[1] * .5 - rec_coordinates[:, -1] = model.origin[-1] + 2 * spacing[-1] - geometry = AcquisitionGeometry(model, rec_coordinates, src_coordinates, - t0=0.0, tn=tn, src_type='Ricker', f0=0.010) + def test_operator_function(self, pickle): + grid = Grid(shape=(3, 3, 3)) + f = Function(name='f', grid=grid) - pkl_geom = pickle.dumps(geometry) - new_geom = pickle.loads(pkl_geom) + op = Operator(Eq(f, f + 1)) + op.apply() - assert np.all(new_geom.src_positions == geometry.src_positions) - assert np.all(new_geom.rec_positions == geometry.rec_positions) - assert new_geom.f0 == geometry.f0 - assert np.all(new_geom.src_type == geometry.src_type) - assert np.all(new_geom.src.data == geometry.src.data) - assert new_geom.t0 == geometry.t0 - assert new_geom.tn == geometry.tn + pkl_op = pickle.dumps(op) + new_op = pickle.loads(pkl_op) + assert str(op) == str(new_op) -def test_symbolics(): - a = Symbol('a') + new_op.apply(f=f) + assert np.all(f.data == 2) - id = IntDiv(a, 3) - pkl_id = pickle.dumps(id) - new_id = pickle.loads(pkl_id) - assert id == new_id + def test_operator_function_w_preallocation(self, pickle): + grid = Grid(shape=(3, 3, 3)) + f = Function(name='f', grid=grid) + f.data - ffp = CallFromPointer('foo', a, ['b', 'c']) - pkl_ffp = pickle.dumps(ffp) - new_ffp = pickle.loads(pkl_ffp) - assert ffp == new_ffp + op = Operator(Eq(f, f + 1)) + op.apply() - li = ListInitializer(['a', 'b']) - pkl_li = pickle.dumps(li) - new_li = pickle.loads(pkl_li) - assert li == new_li + pkl_op = pickle.dumps(op) + new_op = pickle.loads(pkl_op) - df = DefFunction('f', ['a', 1, 2]) - pkl_df = pickle.dumps(df) - new_df = pickle.loads(pkl_df) - assert df == new_df - assert df.arguments == new_df.arguments + assert str(op) == str(new_op) + new_op.apply(f=f) + assert np.all(f.data == 2) -def test_timers(): - """Pickling for Timers used in Operators for C-level profiling.""" - timer = Timer('timer', ['sec0', 'sec1']) - pkl_obj = pickle.dumps(timer) - new_obj = pickle.loads(pkl_obj) - assert new_obj.name == timer.name - assert new_obj.sections == timer.sections - assert new_obj.value._obj.sec0 == timer.value._obj.sec0 == 0.0 - assert new_obj.value._obj.sec1 == timer.value._obj.sec1 == 0.0 + def test_operator_timefunction(self, pickle): + grid = Grid(shape=(3, 3, 3)) + f = TimeFunction(name='f', grid=grid, save=3) + op = Operator(Eq(f.forward, f + 1)) + op.apply(time=0) -def test_operator_parameters(): - grid = Grid(shape=(3, 3, 3)) - f = Function(name='f', grid=grid) - g = TimeFunction(name='g', grid=grid) - h = TimeFunction(name='h', grid=grid, save=10) - op = Operator(Eq(h.forward, h + g + f + 1)) - for i in op.parameters: - pkl_i = pickle.dumps(i) - pickle.loads(pkl_i) + pkl_op = pickle.dumps(op) + new_op = pickle.loads(pkl_op) + + assert str(op) == str(new_op) + new_op.apply(time_m=1, time_M=1, f=f) + assert np.all(f.data[2] == 2) + + def test_operator_timefunction_w_preallocation(self, pickle): + grid = Grid(shape=(3, 3, 3)) + f = TimeFunction(name='f', grid=grid, save=3) + f.data -def test_unjitted_operator(): - grid = Grid(shape=(3, 3, 3)) - f = Function(name='f', grid=grid) + op = Operator(Eq(f.forward, f + 1)) + op.apply(time=0) - op = Operator(Eq(f, f + 1)) + pkl_op = pickle.dumps(op) + new_op = pickle.loads(pkl_op) - pkl_op = pickle.dumps(op) - new_op = pickle.loads(pkl_op) + assert str(op) == str(new_op) - assert str(op) == str(new_op) + new_op.apply(time_m=1, time_M=1, f=f) + assert np.all(f.data[2] == 2) + def test_elemental(self, pickle): + """ + Tests that elemental functions don't get reconstructed differently. + """ + grid = Grid(shape=(101, 101)) + time_range = TimeAxis(start=0.0, stop=1000.0, num=12) -def test_operator_function(): - grid = Grid(shape=(3, 3, 3)) - f = Function(name='f', grid=grid) + nrec = 101 + rec = Receiver(name='rec', grid=grid, npoint=nrec, time_range=time_range) - op = Operator(Eq(f, f + 1)) - op.apply() + u = TimeFunction(name="u", grid=grid, time_order=2, space_order=2) + rec_term = rec.interpolate(expr=u) - pkl_op = pickle.dumps(op) - new_op = pickle.loads(pkl_op) + eq = rec_term.evaluate[2] + eq = eq.func(eq.lhs, eq.rhs.args[0]) - assert str(op) == str(new_op) + op = Operator(eq) - new_op.apply(f=f) - assert np.all(f.data == 2) + pkl_op = pickle.dumps(op) + new_op = pickle.loads(pkl_op) + op.cfunction + new_op.cfunction -def test_operator_function_w_preallocation(): - grid = Grid(shape=(3, 3, 3)) - f = Function(name='f', grid=grid) - f.data + assert str(op) == str(new_op) - op = Operator(Eq(f, f + 1)) - op.apply() + @skipif(['nompi']) + @pytest.mark.parallel(mode=[1]) + def test_mpi_objects(self, pickle): + grid = Grid(shape=(4, 4, 4)) - pkl_op = pickle.dumps(op) - new_op = pickle.loads(pkl_op) + # Neighbours + obj = grid.distributor._obj_neighborhood + pkl_obj = pickle.dumps(obj) + new_obj = pickle.loads(pkl_obj) + assert obj.name == new_obj.name + assert obj.pname == new_obj.pname + assert obj.pfields == new_obj.pfields - assert str(op) == str(new_op) + # Communicator + obj = grid.distributor._obj_comm + pkl_obj = pickle.dumps(obj) + new_obj = pickle.loads(pkl_obj) + assert obj.name == new_obj.name + assert obj.dtype == new_obj.dtype - new_op.apply(f=f) - assert np.all(f.data == 2) + # Status + obj = MPIStatusObject(name='status') + pkl_obj = pickle.dumps(obj) + new_obj = pickle.loads(pkl_obj) + assert obj.name == new_obj.name + assert obj.dtype == new_obj.dtype + # Request + obj = MPIRequestObject(name='request') + pkl_obj = pickle.dumps(obj) + new_obj = pickle.loads(pkl_obj) + assert obj.name == new_obj.name + assert obj.dtype == new_obj.dtype -def test_operator_timefunction(): - grid = Grid(shape=(3, 3, 3)) - f = TimeFunction(name='f', grid=grid, save=3) + def test_threadid(self, pickle): + grid = Grid(shape=(4, 4, 4)) + f = TimeFunction(name='f', grid=grid) + op = Operator(Eq(f.forward, f + 1.), openmp=True) - op = Operator(Eq(f.forward, f + 1)) - op.apply(time=0) + tid = ThreadID(op.nthreads) - pkl_op = pickle.dumps(op) - new_op = pickle.loads(pkl_op) + pkl_tid = pickle.dumps(tid) + new_tid = pickle.loads(pkl_tid) - assert str(op) == str(new_op) + assert tid.name == new_tid.name + assert tid.nthreads.name == new_tid.nthreads.name + assert tid.symbolic_min.name == new_tid.symbolic_min.name + assert tid.symbolic_max.name == new_tid.symbolic_max.name - new_op.apply(time_m=1, time_M=1, f=f) - assert np.all(f.data[2] == 2) + @skipif(['nompi']) + @pytest.mark.parallel(mode=[2]) + def test_mpi_grid(self, pickle): + grid = Grid(shape=(4, 4, 4)) - -def test_operator_timefunction_w_preallocation(): - grid = Grid(shape=(3, 3, 3)) - f = TimeFunction(name='f', grid=grid, save=3) - f.data - - op = Operator(Eq(f.forward, f + 1)) - op.apply(time=0) - - pkl_op = pickle.dumps(op) - new_op = pickle.loads(pkl_op) - - assert str(op) == str(new_op) - - new_op.apply(time_m=1, time_M=1, f=f) - assert np.all(f.data[2] == 2) - - -@skipif(['nompi']) -@pytest.mark.parallel(mode=[1]) -def test_mpi_objects(): - grid = Grid(shape=(4, 4, 4)) - - # Neighbours - obj = grid.distributor._obj_neighborhood - pkl_obj = pickle.dumps(obj) - new_obj = pickle.loads(pkl_obj) - assert obj.name == new_obj.name - assert obj.pname == new_obj.pname - assert obj.pfields == new_obj.pfields - - # Communicator - obj = grid.distributor._obj_comm - pkl_obj = pickle.dumps(obj) - new_obj = pickle.loads(pkl_obj) - assert obj.name == new_obj.name - assert obj.dtype == new_obj.dtype - - # Status - obj = MPIStatusObject(name='status') - pkl_obj = pickle.dumps(obj) - new_obj = pickle.loads(pkl_obj) - assert obj.name == new_obj.name - assert obj.dtype == new_obj.dtype - - # Request - obj = MPIRequestObject(name='request') - pkl_obj = pickle.dumps(obj) - new_obj = pickle.loads(pkl_obj) - assert obj.name == new_obj.name - assert obj.dtype == new_obj.dtype - - -def test_threadid(): - grid = Grid(shape=(4, 4, 4)) - f = TimeFunction(name='f', grid=grid) - op = Operator(Eq(f.forward, f + 1.), openmp=True) - - tid = ThreadID(op.nthreads) - - pkl_tid = pickle.dumps(tid) - new_tid = pickle.loads(pkl_tid) - - assert tid.name == new_tid.name - assert tid.nthreads.name == new_tid.nthreads.name - assert tid.symbolic_min.name == new_tid.symbolic_min.name - assert tid.symbolic_max.name == new_tid.symbolic_max.name - - -@skipif(['nompi']) -@pytest.mark.parallel(mode=[2]) -def test_mpi_grid(): - grid = Grid(shape=(4, 4, 4)) - - pkl_grid = pickle.dumps(grid) - new_grid = pickle.loads(pkl_grid) - - assert grid.distributor.comm.size == 2 - assert new_grid.distributor.comm.size == 1 # Using cloned MPI_COMM_SELF - assert grid.distributor.shape == (2, 4, 4) - assert new_grid.distributor.shape == (4, 4, 4) - - # Same as before but only one rank calls `loads`. We make sure this - # won't cause any hanging (this was an issue in the past when we're - # using MPI_COMM_WORLD at unpickling - if MPI.COMM_WORLD.rank == 1: + pkl_grid = pickle.dumps(grid) new_grid = pickle.loads(pkl_grid) - assert new_grid.distributor.comm.size == 1 - MPI.COMM_WORLD.Barrier() - - -def test_temp_function(): - grid = Grid(shape=(3, 3)) - d = Dimension(name='d') - - cf = TempFunction(name='f', dtype=np.float64, dimensions=grid.dimensions, - halo=((1, 1), (1, 1))) - - pkl_cf = pickle.dumps(cf) - new_cf = pickle.loads(pkl_cf) - assert new_cf.name == cf.name - assert new_cf.dtype is np.float64 - assert new_cf.halo == ((1, 1), (1, 1)) - assert new_cf.ndim == cf.ndim - assert new_cf.dim is None - - pcf = cf._make_pointer(d) - - pkl_pcf = pickle.dumps(pcf) - new_pcf = pickle.loads(pkl_pcf) - assert new_pcf.name == pcf.name - assert new_pcf.dim.name == 'd' - assert new_pcf.ndim == cf.ndim + 1 - assert new_pcf.halo == ((0, 0), (1, 1), (1, 1)) - - -def test_deviceid(): - did = DeviceID() - - pkl_did = pickle.dumps(did) - new_did = pickle.loads(pkl_did) - # TODO: this will be extend when we'll support DeviceID - # for multi-node multi-gpu execution, when DeviceID will have - # to pick its default value from an MPI communicator attached - # to the runtime arguments - - assert did.name == new_did.name - assert did.dtype == new_did.dtype - - -def test_npthreads(): - npt = NPThreads(name='npt', size=3) - - pkl_npt = pickle.dumps(npt) - new_npt = pickle.loads(pkl_npt) - - assert npt.name == new_npt.name - assert npt.dtype == new_npt.dtype - assert npt.size == new_npt.size - - -@skipif(['nompi']) -@pytest.mark.parallel(mode=[(1, 'full')]) -def test_mpi_fullmode_objects(): - grid = Grid(shape=(4, 4, 4)) - x, y, _ = grid.dimensions - - # Message - f = Function(name='f', grid=grid) - obj = MPIMsgEnriched('msg', f, [Halo(x, LEFT)]) - pkl_obj = pickle.dumps(obj) - new_obj = pickle.loads(pkl_obj) - assert obj.name == new_obj.name - assert obj.target.name == new_obj.target.name - assert all(obj.target.dimensions[i].name == new_obj.target.dimensions[i].name - for i in range(grid.dim)) - assert new_obj.target.dimensions[0] is new_obj.halos[0].dim - - # Region - x_m, x_M = x.symbolic_min, x.symbolic_max - y_m, y_M = y.symbolic_min, y.symbolic_max - obj = MPIRegion('reg', 1, [y, x], - [(((x, OWNED, LEFT),), {x: (x_m, Min(x_M, x_m))}), - (((y, OWNED, LEFT),), {y: (y_m, Min(y_M, y_m))})]) - pkl_obj = pickle.dumps(obj) - new_obj = pickle.loads(pkl_obj) - assert obj.prefix == new_obj.prefix - assert obj.key == new_obj.key - assert obj.name == new_obj.name - assert len(new_obj.arguments) == 2 - assert all(d0.name == d1.name for d0, d1 in zip(obj.arguments, new_obj.arguments)) - assert all(new_obj.arguments[i] is new_obj.owned[i][0][0][0] # `x` and `y` - for i in range(2)) - assert new_obj.owned[0][0][0][1] is new_obj.owned[1][0][0][1] # `OWNED` - assert new_obj.owned[0][0][0][2] is new_obj.owned[1][0][0][2] # `LEFT` - for n, i in enumerate(new_obj.owned): - d, v = list(i[1].items())[0] - assert d is new_obj.arguments[n] - assert v[0] is d.symbolic_min - assert v[1] == Min(d.symbolic_max, d.symbolic_min) - - -@skipif(['nompi']) -@pytest.mark.parallel(mode=[(1, 'basic'), (1, 'full')]) -def test_mpi_operator(): - grid = Grid(shape=(4,)) - f = TimeFunction(name='f', grid=grid) - - # Using `sum` creates a stencil in `x`, which in turn will - # trigger the generation of code for MPI halo exchange - op = Operator(Eq(f.forward, f.sum() + 1)) - op.apply(time=2) - - pkl_op = pickle.dumps(op) - new_op = pickle.loads(pkl_op) - - assert str(op) == str(new_op) - - new_grid = new_op.input[0].grid - g = TimeFunction(name='g', grid=new_grid) - new_op.apply(time=2, f=g) - assert np.all(f.data[0] == [2., 3., 3., 3.]) - assert np.all(f.data[1] == [3., 6., 7., 7.]) - assert np.all(g.data[0] == f.data[0]) - assert np.all(g.data[1] == f.data[1]) - - -def test_full_model(): - - shape = (50, 50, 50) - spacing = [10. for _ in shape] - nbl = 10 - - # Create two-layer model from preset - model = demo_model(preset='layers-isotropic', vp_top=1., vp_bottom=2., - spacing=spacing, shape=shape, nbl=nbl) - - # Test Model pickling - pkl_model = pickle.dumps(model) - new_model = pickle.loads(pkl_model) - assert np.isclose(np.linalg.norm(model.vp.data[:]-new_model.vp.data[:]), 0) - - f0 = .010 - dt = model.critical_dt - t0 = 0.0 - tn = 350.0 - time_range = TimeAxis(start=t0, stop=tn, step=dt) - - # Test TimeAxis pickling - pkl_time_range = pickle.dumps(time_range) - new_time_range = pickle.loads(pkl_time_range) - assert np.isclose(np.linalg.norm(time_range.time_values), - np.linalg.norm(new_time_range.time_values)) - - # Test Class Constant pickling - pkl_origin = pickle.dumps(model.grid.origin_symbols) - new_origin = pickle.loads(pkl_origin) - - for a, b in zip(model.grid.origin_symbols, new_origin): - assert a.compare(b) == 0 - - # Test Class TimeDimension pickling - time_dim = TimeDimension(name='time', spacing=Constant(name='dt', dtype=np.float32)) - pkl_time_dim = pickle.dumps(time_dim) - new_time_dim = pickle.loads(pkl_time_dim) - assert time_dim.spacing._value == new_time_dim.spacing._value - - # Test Class SteppingDimension - stepping_dim = SteppingDimension(name='t', parent=time_dim) - pkl_stepping_dim = pickle.dumps(stepping_dim) - new_stepping_dim = pickle.loads(pkl_stepping_dim) - assert stepping_dim.is_Time == new_stepping_dim.is_Time - - # Test Grid pickling - pkl_grid = pickle.dumps(model.grid) - new_grid = pickle.loads(pkl_grid) - assert model.grid.shape == new_grid.shape - - assert model.grid.extent == new_grid.extent - assert model.grid.shape == new_grid.shape - for a, b in zip(model.grid.dimensions, new_grid.dimensions): - assert a.compare(b) == 0 - - ricker = RickerSource(name='src', grid=model.grid, f0=f0, time_range=time_range) - - pkl_ricker = pickle.dumps(ricker) - new_ricker = pickle.loads(pkl_ricker) - assert np.isclose(np.linalg.norm(ricker.data), np.linalg.norm(new_ricker.data)) - # FIXME: fails randomly when using data.flatten() AND numpy is using MKL + assert grid.distributor.comm.size == 2 + assert new_grid.distributor.comm.size == 1 # Using cloned MPI_COMM_SELF + assert grid.distributor.shape == (2, 4, 4) + assert new_grid.distributor.shape == (4, 4, 4) + + # Same as before but only one rank calls `loads`. We make sure this + # won't cause any hanging (this was an issue in the past when we're + # using MPI_COMM_WORLD at unpickling + if MPI.COMM_WORLD.rank == 1: + new_grid = pickle.loads(pkl_grid) + assert new_grid.distributor.comm.size == 1 + MPI.COMM_WORLD.Barrier() + + @skipif(['nompi']) + @pytest.mark.parallel(mode=[(1, 'full')]) + def test_mpi_fullmode_objects(self, pickle): + grid = Grid(shape=(4, 4, 4)) + x, y, _ = grid.dimensions + + # Message + f = Function(name='f', grid=grid) + obj = MPIMsgEnriched('msg', f, [Halo(x, LEFT)]) + pkl_obj = pickle.dumps(obj) + new_obj = pickle.loads(pkl_obj) + assert obj.name == new_obj.name + assert obj.target.name == new_obj.target.name + assert all(obj.target.dimensions[i].name == new_obj.target.dimensions[i].name + for i in range(grid.dim)) + assert new_obj.target.dimensions[0] is new_obj.halos[0].dim + + # Region + x_m, x_M = x.symbolic_min, x.symbolic_max + y_m, y_M = y.symbolic_min, y.symbolic_max + obj = MPIRegion('reg', 1, [y, x], + [(((x, OWNED, LEFT),), {x: (x_m, Min(x_M, x_m))}), + (((y, OWNED, LEFT),), {y: (y_m, Min(y_M, y_m))})]) + pkl_obj = pickle.dumps(obj) + new_obj = pickle.loads(pkl_obj) + assert obj.prefix == new_obj.prefix + assert obj.key == new_obj.key + assert obj.name == new_obj.name + assert len(new_obj.arguments) == 2 + assert all(d0.name == d1.name for d0, d1 in zip(obj.arguments, new_obj.arguments)) + assert all(new_obj.arguments[i] is new_obj.owned[i][0][0][0] # `x` and `y` + for i in range(2)) + assert new_obj.owned[0][0][0][1] is new_obj.owned[1][0][0][1] # `OWNED` + assert new_obj.owned[0][0][0][2] is new_obj.owned[1][0][0][2] # `LEFT` + for n, i in enumerate(new_obj.owned): + d, v = list(i[1].items())[0] + assert d is new_obj.arguments[n] + assert v[0] is d.symbolic_min + assert v[1] == Min(d.symbolic_max, d.symbolic_min) + + @skipif(['nompi']) + @pytest.mark.parallel(mode=[(1, 'basic'), (1, 'full')]) + def test_mpi_operator(self, pickle): + grid = Grid(shape=(4,)) + f = TimeFunction(name='f', grid=grid) + + # Using `sum` creates a stencil in `x`, which in turn will + # trigger the generation of code for MPI halo exchange + op = Operator(Eq(f.forward, f.sum() + 1)) + op.apply(time=2) + + pkl_op = pickle.dumps(op) + new_op = pickle.loads(pkl_op) + + assert str(op) == str(new_op) + + new_grid = new_op.input[0].grid + g = TimeFunction(name='g', grid=new_grid) + new_op.apply(time=2, f=g) + assert np.all(f.data[0] == [2., 3., 3., 3.]) + assert np.all(f.data[1] == [3., 6., 7., 7.]) + assert np.all(g.data[0] == f.data[0]) + assert np.all(g.data[1] == f.data[1]) + + def test_full_model(self, pickle): + shape = (50, 50, 50) + spacing = [10. for _ in shape] + nbl = 10 + + # Create two-layer model from preset + model = demo_model(preset='layers-isotropic', vp_top=1., vp_bottom=2., + spacing=spacing, shape=shape, nbl=nbl) + + # Test Model pickling + pkl_model = pickle.dumps(model) + new_model = pickle.loads(pkl_model) + assert np.isclose(np.linalg.norm(model.vp.data[:]-new_model.vp.data[:]), 0) + + f0 = .010 + dt = model.critical_dt + t0 = 0.0 + tn = 350.0 + time_range = TimeAxis(start=t0, stop=tn, step=dt) + + # Test TimeAxis pickling + pkl_time_range = pickle.dumps(time_range) + new_time_range = pickle.loads(pkl_time_range) + assert np.isclose(np.linalg.norm(time_range.time_values), + np.linalg.norm(new_time_range.time_values)) + + # Test Class Constant pickling + pkl_origin = pickle.dumps(model.grid.origin_symbols) + new_origin = pickle.loads(pkl_origin) + + for a, b in zip(model.grid.origin_symbols, new_origin): + assert a.compare(b) == 0 + + # Test Class TimeDimension pickling + time_dim = TimeDimension(name='time', + spacing=Constant(name='dt', dtype=np.float32)) + pkl_time_dim = pickle.dumps(time_dim) + new_time_dim = pickle.loads(pkl_time_dim) + assert time_dim.spacing._value == new_time_dim.spacing._value + + # Test Class SteppingDimension + stepping_dim = SteppingDimension(name='t', parent=time_dim) + pkl_stepping_dim = pickle.dumps(stepping_dim) + new_stepping_dim = pickle.loads(pkl_stepping_dim) + assert stepping_dim.is_Time == new_stepping_dim.is_Time + + # Test Grid pickling + pkl_grid = pickle.dumps(model.grid) + new_grid = pickle.loads(pkl_grid) + assert model.grid.shape == new_grid.shape -def test_usave_sampled(): - grid = Grid(shape=(10, 10, 10)) - u = TimeFunction(name="u", grid=grid, time_order=2, space_order=8) - - time_range = TimeAxis(start=0, stop=1000, step=1) - - factor = Constant(name="factor", value=10, dtype=np.int32) - time_sub = ConditionalDimension(name="time_sub", parent=grid.time_dim, - factor=factor) - - u0_save = TimeFunction(name="u0_save", grid=grid, time_dim=time_sub) - src = RickerSource(name="src", grid=grid, time_range=time_range, f0=10) - - pde = u.dt2 - u.laplace - stencil = Eq(u.forward, solve(pde, u.forward)) - - src_term = src.inject(field=u.forward, expr=src) - - eqn = [stencil] + src_term - eqn += [Eq(u0_save, u)] - op_fwd = Operator(eqn) + assert model.grid.extent == new_grid.extent + assert model.grid.shape == new_grid.shape + for a, b in zip(model.grid.dimensions, new_grid.dimensions): + assert a.compare(b) == 0 - tmp_pickle_op_fn = "tmp_operator.pickle" - pickle.dump(op_fwd, open(tmp_pickle_op_fn, "wb")) - op_new = pickle.load(open(tmp_pickle_op_fn, "rb")) + ricker = RickerSource(name='src', grid=model.grid, f0=f0, time_range=time_range) - assert str(op_fwd) == str(op_new) + pkl_ricker = pickle.dumps(ricker) + new_ricker = pickle.loads(pkl_ricker) + assert np.isclose(np.linalg.norm(ricker.data), np.linalg.norm(new_ricker.data)) + # FIXME: fails randomly when using data.flatten() AND numpy is using MKL + def test_usave_sampled(self, pickle): + grid = Grid(shape=(10, 10, 10)) + u = TimeFunction(name="u", grid=grid, time_order=2, space_order=8) -def test_elemental(): - """ - Tests that elemental function doesn't get reconstructed differently - """ - grid = Grid(shape=(101, 101)) - time_range = TimeAxis(start=0.0, stop=1000.0, num=12) + time_range = TimeAxis(start=0, stop=1000, step=1) - nrec = 101 - rec = Receiver(name='rec', grid=grid, npoint=nrec, time_range=time_range) + factor = Constant(name="factor", value=10, dtype=np.int32) + time_sub = ConditionalDimension(name="time_sub", parent=grid.time_dim, + factor=factor) - u = TimeFunction(name="u", grid=grid, time_order=2, space_order=2) - rec_term = rec.interpolate(expr=u) + u0_save = TimeFunction(name="u0_save", grid=grid, time_dim=time_sub) + src = RickerSource(name="src", grid=grid, time_range=time_range, f0=10) - eq = rec_term.evaluate[2] - eq = eq.func(eq.lhs, eq.rhs.args[0]) + pde = u.dt2 - u.laplace + stencil = Eq(u.forward, solve(pde, u.forward)) - op = Operator(eq) + src_term = src.inject(field=u.forward, expr=src) - pkl_op = pickle.dumps(op) - new_op = pickle.loads(pkl_op) + eqn = [stencil] + src_term + eqn += [Eq(u0_save, u)] + op_fwd = Operator(eqn) - op.cfunction - new_op.cfunction + tmp_pickle_op_fn = "tmp_operator.pickle" + pickle.dump(op_fwd, open(tmp_pickle_op_fn, "wb")) + op_new = pickle.load(open(tmp_pickle_op_fn, "rb")) - assert str(op) == str(new_op) + assert str(op_fwd) == str(op_new) From 9636366fe7d98d4b06ecbf58c917537343f48211 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Wed, 7 Jun 2023 06:48:02 +0000 Subject: [PATCH 22/22] arch: Tidy up --- devito/arch/compiler.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py index 452084bc91..52e6f762dd 100644 --- a/devito/arch/compiler.py +++ b/devito/arch/compiler.py @@ -416,6 +416,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) platform = kwargs.pop('platform', configuration['platform']) + # Graviton flag if platform is GRAVITON: self.cflags += ['-mcpu=neoverse-n1'] @@ -493,13 +494,13 @@ class AOMPCompiler(Compiler): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + language = kwargs.pop('language', configuration['language']) + platform = kwargs.pop('platform', configuration['platform']) + self.cflags += ['-Wno-unused-result', '-Wno-unused-variable'] if not configuration['safe-math']: self.cflags.append('-ffast-math') - language = kwargs.pop('language', configuration['language']) - platform = kwargs.pop('platform', configuration['platform']) - if platform is NVIDIAX: self.cflags.remove('-std=c99') elif platform is AMDGPUX: @@ -685,6 +686,7 @@ def __init__(self, *args, **kwargs): platform = kwargs.pop('platform', configuration['platform']) language = kwargs.pop('language', configuration['language']) + self.cflags.append("-xHost") if configuration['safe-math']: @@ -730,10 +732,10 @@ class IntelKNLCompiler(IntelCompiler): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.cflags.append('-xMIC-AVX512') - language = kwargs.pop('language', configuration['language']) + self.cflags.append('-xMIC-AVX512') + if language != 'openmp': warning("Running on Intel KNL without OpenMP is highly discouraged")