Skip to content

Commit

Permalink
Merge branch 'master' into add-opkwargs
Browse files Browse the repository at this point in the history
  • Loading branch information
ccuetom committed Jun 8, 2023
2 parents 52fb33b + 9b9c45e commit 09e03cb
Show file tree
Hide file tree
Showing 24 changed files with 961 additions and 746 deletions.
12 changes: 7 additions & 5 deletions devito/arch/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,7 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

platform = kwargs.pop('platform', configuration['platform'])

# Graviton flag
if platform is GRAVITON:
self.cflags += ['-mcpu=neoverse-n1']
Expand Down Expand Up @@ -493,13 +494,13 @@ class AOMPCompiler(Compiler):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

language = kwargs.pop('language', configuration['language'])
platform = kwargs.pop('platform', configuration['platform'])

self.cflags += ['-Wno-unused-result', '-Wno-unused-variable']
if not configuration['safe-math']:
self.cflags.append('-ffast-math')

language = kwargs.pop('language', configuration['language'])
platform = kwargs.pop('platform', configuration['platform'])

if platform is NVIDIAX:
self.cflags.remove('-std=c99')
elif platform is AMDGPUX:
Expand Down Expand Up @@ -685,6 +686,7 @@ def __init__(self, *args, **kwargs):

platform = kwargs.pop('platform', configuration['platform'])
language = kwargs.pop('language', configuration['language'])

self.cflags.append("-xHost")

if configuration['safe-math']:
Expand Down Expand Up @@ -730,10 +732,10 @@ class IntelKNLCompiler(IntelCompiler):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self.cflags.append('-xMIC-AVX512')

language = kwargs.pop('language', configuration['language'])

self.cflags.append('-xMIC-AVX512')

if language != 'openmp':
warning("Running on Intel KNL without OpenMP is highly discouraged")

Expand Down
5 changes: 5 additions & 0 deletions devito/ir/clusters/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,11 @@ def callback(self, clusters, prefix, seen=None):
# be rescheduled after `c` upon topological sorting
points.update(a.access for a in c.scope.accesses if a.is_write)

# Sort for determinism
# NOTE: not sorting might impact code generation. The order of
# the args is important because that's what search functions honor!
points = sorted(points, key=str)

rhs = HaloTouch(*points, halo_scheme=halo_scheme)

# Insert only if not redundant, to avoid useless pollution
Expand Down
2 changes: 1 addition & 1 deletion devito/ir/iet/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def iet_build(stree):
body = HaloSpot(queues.pop(i), i.halo_scheme)

elif i.is_Sync:
body = SyncSpot(i.sync_ops, body=queues.pop(i))
body = SyncSpot(i.sync_ops, body=queues.pop(i, None))

queues.setdefault(i.parent, []).append(body)

Expand Down
23 changes: 17 additions & 6 deletions devito/ir/iet/visitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@


__all__ = ['FindApplications', 'FindNodes', 'FindSections', 'FindSymbols',
'MapExprStmts', 'MapNodes', 'IsPerfectIteration', 'printAST', 'CGen',
'CInterface', 'Transformer', 'Uxreplace']
'MapExprStmts', 'MapHaloSpots', 'MapNodes', 'IsPerfectIteration',
'printAST', 'CGen', 'CInterface', 'Transformer', 'Uxreplace']


class Visitor(GenericVisitor):
Expand Down Expand Up @@ -737,14 +737,17 @@ def visit_Conditional(self, o, ret=None, queue=None):
return ret


class MapExprStmts(FindSections):
class MapKind(FindSections):

"""
Construct a mapper from ExprStmts, i.e. expression statements such as Calls
and Expressions, to their enclosing block (e.g., Iteration, Block).
Base class to construct mappers from Nodes of given type to their enclosing
scope of Nodes.
"""

def visit_ExprStmt(self, o, ret=None, queue=None):
# NOTE: Ideally, we would use a metaclass that dynamically constructs mappers
# for the kind supplied by the caller, but it'd be overkill at the moment

def visit_dummy(self, o, ret=None, queue=None):
if ret is None:
ret = self.default_retval()
ret[o] = as_tuple(queue)
Expand All @@ -754,6 +757,14 @@ def visit_ExprStmt(self, o, ret=None, queue=None):
visit_Block = FindSections.visit_Iteration


class MapExprStmts(MapKind):
visit_ExprStmt = MapKind.visit_dummy


class MapHaloSpots(MapKind):
visit_HaloSpot = MapKind.visit_dummy


class MapNodes(Visitor):

@classmethod
Expand Down
9 changes: 6 additions & 3 deletions devito/ir/stree/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,13 @@ def preprocess(clusters, options=None, **kwargs):
found.append(c1)
queue.remove(c1)

syncs = normalize_syncs(c.syncs, *[c1.syncs for c1 in found])
halo_scheme = HaloScheme.union([c1.halo_scheme for c1 in found])
syncs = normalize_syncs(*[c1.syncs for c1 in found])
if syncs:
ispace = c.ispace.project(syncs)
processed.append(c.rebuild(exprs=[], ispace=ispace, syncs=syncs))

processed.append(c.rebuild(syncs=syncs, halo_scheme=halo_scheme))
halo_scheme = HaloScheme.union([c1.halo_scheme for c1 in found])
processed.append(c.rebuild(halo_scheme=halo_scheme))

# Sanity check!
try:
Expand Down
49 changes: 49 additions & 0 deletions devito/mpi/halo_scheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from devito import configuration
from devito.data import CORE, OWNED, LEFT, CENTER, RIGHT
from devito.ir.support import Forward, Scope
from devito.symbolics.manipulation import _uxreplace_registry
from devito.tools import (Reconstructable, Tag, as_tuple, filter_ordered, flatten,
frozendict, is_integer)
from devito.types import Grid
Expand Down Expand Up @@ -583,3 +584,51 @@ def __eq__(self, other):
return isinstance(other, HaloTouch) and self.halo_scheme == other.halo_scheme

func = Reconstructable._rebuild


def _uxreplace_dispatch_haloscheme(hs0, rule):
changed = False
hs = hs0
for f, hse0 in hs0.fmapper.items():
# Is it an attempt to replace `f`?
for i, v in rule.items():
if i is f:
# Yes!
g = v
hse = hse0

elif i.is_Indexed and i.function is f and v.is_Indexed:
# Yes, but through an Indexed, hence the `loc_indices` may now
# differ; let's infer them from the context
g = v.function

loc_indices = {}
loc_dirs = {}
for d0, loc_index in hse0.loc_indices.items():
if i.indices[d0] == loc_index:
# They indeed do change
d1 = g.indices[d0]
loc_indices[d1] = v.indices[d0]
loc_dirs[d1] = hse0.loc_dirs[d0]

if len(loc_indices) != len(hse0.loc_indices):
# Nope, let's try with the next Indexed, if any
continue

hse = HaloSchemeEntry(frozendict(loc_indices),
frozendict(loc_dirs),
hse0.halos, hse0.dims)

else:
continue

hs = hs.drop(f).add(g, hse)
changed |= True

break

return hs, changed


_uxreplace_registry.register(HaloTouch,
{HaloScheme: _uxreplace_dispatch_haloscheme})
68 changes: 43 additions & 25 deletions devito/mpi/routines.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from devito.symbolics import (Byref, CondNe, FieldFromPointer, FieldFromComposite,
IndexedPointer, Macro, cast_mapper, subs_op_args)
from devito.tools import (as_mapper, dtype_to_mpitype, dtype_len, dtype_to_ctype,
flatten, generator, split)
flatten, generator, is_integer, split)
from devito.types import (Array, Bundle, Dimension, Eq, Symbol, LocalObject,
CompositeObject, CustomDimension)

Expand Down Expand Up @@ -338,35 +338,37 @@ def _make_all(self, f, hse, msg):

def _make_copy(self, f, hse, key, swap=False):
dims = [d.root for d in f.dimensions if d not in hse.loc_indices]
ofs = [Symbol(name='o%s' % d.root, is_const=True) for d in f.dimensions]

f_offsets = []
f_indices = []
for d, h in zip(f.dimensions, f._size_nodomain.left):
offset = Symbol(name='o%s' % d.root, is_const=True)
f_offsets.append(offset)
offset_nohalo = offset - h
f_indices.append(offset_nohalo + (d.root if d not in hse.loc_indices else 0))
bshape = [Symbol(name='b%s' % d.symbolic_size) for d in dims]
bdims = [CustomDimension(name=d.name, parent=d, symbolic_size=s)
for d, s in zip(dims, bshape)]

eqns = []
eqns.extend([Eq(d.symbolic_min, 0) for d in dims])
eqns.extend([Eq(d.symbolic_max, d.symbolic_size - 1) for d in dims])
eqns.extend([Eq(d.symbolic_min, 0) for d in bdims])
eqns.extend([Eq(d.symbolic_max, d.symbolic_size - 1) for d in bdims])

vd = CustomDimension(name='vd', symbolic_size=f.ncomp)
buf = Array(name='buf', dimensions=[vd] + bdims, dtype=f.c0.dtype,
padding=0)

mapper = dict(zip(dims, bdims))
findices = [o - h + mapper.get(d.root, 0)
for d, o, h in zip(f.dimensions, ofs, f._size_nodomain.left)]

bdims = [CustomDimension(name='vd', symbolic_size=f.ncomp)] + dims
buf = Array(name='buf', dimensions=bdims, dtype=f.c0.dtype, padding=0)
if swap is False:
swap = lambda i, j: (i, j)
name = 'gather%s' % key
else:
swap = lambda i, j: (j, i)
name = 'scatter%s' % key
for i, c in enumerate(f.components):
eqns.append(Eq(*swap(buf[[i] + dims], c[f_indices])))
eqns.append(Eq(*swap(buf[[i] + bdims], c[findices])))

# Compile `eqns` into an IET via recursive compilation
irs, _ = self.rcompile(eqns)

shape = [d.symbolic_size for d in dims]
parameters = [buf] + shape + list(f.components) + f_offsets
parameters = [buf] + bshape + list(f.components) + ofs

return CopyBuffer(name, irs.uiet, parameters)

Expand Down Expand Up @@ -1156,7 +1158,19 @@ def halos(self):
def npeers(self):
return len(self._halos)

def _arg_defaults(self, allocator, alias):
def _as_number(self, v, args):
"""
Turn a sympy.Symbol into a number. In doing so, perform a number of
sanity checks to ensure we get a Symbol iff the Msg is for an Array.
"""
if is_integer(v):
return int(v)
else:
assert self.target.c0.is_Array
assert args is not None
return int(subs_op_args(v, args))

def _arg_defaults(self, allocator, alias, args=None):
# Lazy initialization if `allocator` is necessary as the `allocator`
# type isn't really known until an Operator is constructed
self._allocator = allocator
Expand All @@ -1165,14 +1179,14 @@ def _arg_defaults(self, allocator, alias):
for i, halo in enumerate(self.halos):
entry = self.value[i]

# Buffer size for this peer
# Buffer shape for this peer
shape = []
for dim, side in zip(*halo):
try:
shape.append(getattr(f._size_owned[dim], side.name))
except AttributeError:
assert side is CENTER
shape.append(f._size_domain[dim])
shape.append(self._as_number(f._size_domain[dim], args))
entry.sizes = (c_int*len(shape))(*shape)

# Allocate the send/recv buffers
Expand All @@ -1181,8 +1195,8 @@ def _arg_defaults(self, allocator, alias):
entry.bufg, bufg_memfree_args = allocator._alloc_C_libcall(size, ctype)
entry.bufs, bufs_memfree_args = allocator._alloc_C_libcall(size, ctype)

# The `memfree_args` will be used to deallocate the buffer upon returning
# from C-land
# The `memfree_args` will be used to deallocate the buffer upon
# returning from C-land
self._memfree_args.extend([bufg_memfree_args, bufs_memfree_args])

return {self.name: self.value}
Expand All @@ -1198,7 +1212,7 @@ def _arg_values(self, args=None, **kwargs):
else:
alias = f

return self._arg_defaults(args.allocator, alias=alias)
return self._arg_defaults(args.allocator, alias=alias, args=args)

def _arg_apply(self, *args, **kwargs):
self._C_memfree()
Expand All @@ -1218,30 +1232,34 @@ class MPIMsgEnriched(MPIMsg):
(_C_field_to, c_int)
]

def _arg_defaults(self, allocator, alias=None):
super()._arg_defaults(allocator, alias)
def _arg_defaults(self, allocator, alias=None, args=None):
super()._arg_defaults(allocator, alias, args=args)

f = alias or self.target.c0
neighborhood = f.grid.distributor.neighborhood

for i, halo in enumerate(self.halos):
entry = self.value[i]

# `torank` peer + gather offsets
entry.torank = neighborhood[halo.side]
ofsg = []
for dim, side in zip(*halo):
try:
ofsg.append(getattr(f._offset_owned[dim], side.name))
v = getattr(f._offset_owned[dim], side.name)
ofsg.append(self._as_number(v, args))
except AttributeError:
assert side is CENTER
ofsg.append(f._offset_owned[dim].left)
entry.ofsg = (c_int*len(ofsg))(*ofsg)

# `fromrank` peer + scatter offsets
entry.fromrank = neighborhood[tuple(i.flip() for i in halo.side)]
ofss = []
for dim, side in zip(*halo):
try:
ofss.append(getattr(f._offset_halo[dim], side.flip().name))
v = getattr(f._offset_halo[dim], side.flip().name)
ofss.append(self._as_number(v, args))
except AttributeError:
assert side is CENTER
# Note `_offset_owned`, and not `_offset_halo`, is *not* a bug
Expand Down
22 changes: 14 additions & 8 deletions devito/operator/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -995,6 +995,19 @@ def __setstate__(self, state):
self._lib.name = soname


# Default action (perform or bypass) for selected compilation passes upon
# recursive compilation
# NOTE: it may not only be pointless to apply the following passes recursively
# (because once, during the main compilation phase, is simply enough), but also
# dangerous as some of them (the minority) might break in some circumstances
# if applied in cascade (e.g., `linearization` on top of `linearization`)
rcompile_registry = {
'mpi': False,
'linearize': False,
'place-transfers': False
}


def rcompile(expressions, kwargs=None):
"""
Perform recursive compilation on an ordered sequence of symbolic expressions.
Expand All @@ -1008,14 +1021,7 @@ def rcompile(expressions, kwargs=None):

# Tweak the compilation kwargs
options = dict(kwargs['options'])
# NOTE: it is not only pointless to apply the following passes recursively
# (because once, during the main compilation phase, is simply enough), but
# also dangerous as a handful of compiler passes, the minority, might break
# in some circumstances if applied in cascade (e.g., `linearization` on top
# of `linearization`)
options['mpi'] = False
options['linearize'] = False # Will be carried out later on
options['place-transfers'] = False
options.update(rcompile_registry)
kwargs['options'] = options

# Recursive profiling not supported -- would be a complete mess
Expand Down
Loading

0 comments on commit 09e03cb

Please sign in to comment.