Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compiler: Misc compiler fixes and improvements -- part II #2138

Merged
merged 22 commits into from
Jun 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ee57fb8
compiler: Fix Array.shape
FabioLuporini May 27, 2023
858c348
mpi: Support C-land Array halo exchange
FabioLuporini May 26, 2023
4a184e0
compiler: Relax WaitLock regions in a ScheduleTree
FabioLuporini May 29, 2023
2d1573c
compiler: Relax HaloTouch behavior upon uxreplace
FabioLuporini May 29, 2023
7d83331
compiler: Improve HaloTouch + uxreplace
FabioLuporini May 29, 2023
14257b1
mpi: Use ad-hoc Dimensions for buffers
FabioLuporini May 29, 2023
3cd5ae6
compiler: Patch merge_halospot in presence of Conditionals
FabioLuporini May 30, 2023
134dcd3
compiler: Enable rcompile customization
FabioLuporini May 30, 2023
864af11
mpi: Patch MPIMsg construction
FabioLuporini May 31, 2023
c683071
compiler: Patch double-buffering
FabioLuporini May 31, 2023
23a5824
compiler: Fix BoundSymbol and Indirection pickling
FabioLuporini Jun 1, 2023
ca81cea
compiler: Enable HierarchyLayer comparison
FabioLuporini Jun 1, 2023
e87a0c7
misc: Switch from pickle to cloudpickle
FabioLuporini Jun 1, 2023
b596dae
compiler: Patch HaloTouch via uxreplace
FabioLuporini Jun 1, 2023
a185119
compiler: Enhance Reconstructable with variadic args support
FabioLuporini Apr 19, 2023
04ae0ec
compiler: Revamp FIndexed for correct reconstruction
FabioLuporini Jun 1, 2023
0da295e
compiler: Patch c_char_p lowering
FabioLuporini Jun 4, 2023
dc43c52
compiler: Simplify is_on_device
FabioLuporini Jun 4, 2023
1a20fa8
compiler: Impose canonical ordering for HaloTouch args
FabioLuporini Jun 4, 2023
a38db4b
compiler: Promote distributor to AbstractFunction
FabioLuporini Jun 7, 2023
25fbc79
tests: Try both pickle and cloudpickle
FabioLuporini Jun 7, 2023
9636366
arch: Tidy up
FabioLuporini Jun 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions devito/arch/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,7 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

platform = kwargs.pop('platform', configuration['platform'])

# Graviton flag
if platform is GRAVITON:
self.cflags += ['-mcpu=neoverse-n1']
Expand Down Expand Up @@ -493,13 +494,13 @@ class AOMPCompiler(Compiler):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

language = kwargs.pop('language', configuration['language'])
platform = kwargs.pop('platform', configuration['platform'])

self.cflags += ['-Wno-unused-result', '-Wno-unused-variable']
if not configuration['safe-math']:
self.cflags.append('-ffast-math')

language = kwargs.pop('language', configuration['language'])
platform = kwargs.pop('platform', configuration['platform'])

if platform is NVIDIAX:
self.cflags.remove('-std=c99')
elif platform is AMDGPUX:
Expand Down Expand Up @@ -685,6 +686,7 @@ def __init__(self, *args, **kwargs):

platform = kwargs.pop('platform', configuration['platform'])
language = kwargs.pop('language', configuration['language'])

self.cflags.append("-xHost")

if configuration['safe-math']:
Expand Down Expand Up @@ -730,10 +732,10 @@ class IntelKNLCompiler(IntelCompiler):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self.cflags.append('-xMIC-AVX512')

language = kwargs.pop('language', configuration['language'])

self.cflags.append('-xMIC-AVX512')

if language != 'openmp':
warning("Running on Intel KNL without OpenMP is highly discouraged")

Expand Down
5 changes: 5 additions & 0 deletions devito/ir/clusters/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,11 @@ def callback(self, clusters, prefix, seen=None):
# be rescheduled after `c` upon topological sorting
points.update(a.access for a in c.scope.accesses if a.is_write)

# Sort for determinism
# NOTE: not sorting might impact code generation. The order of
# the args is important because that's what search functions honor!
points = sorted(points, key=str)

rhs = HaloTouch(*points, halo_scheme=halo_scheme)

# Insert only if not redundant, to avoid useless pollution
Expand Down
2 changes: 1 addition & 1 deletion devito/ir/iet/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def iet_build(stree):
body = HaloSpot(queues.pop(i), i.halo_scheme)

elif i.is_Sync:
body = SyncSpot(i.sync_ops, body=queues.pop(i))
body = SyncSpot(i.sync_ops, body=queues.pop(i, None))

queues.setdefault(i.parent, []).append(body)

Expand Down
23 changes: 17 additions & 6 deletions devito/ir/iet/visitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@


__all__ = ['FindApplications', 'FindNodes', 'FindSections', 'FindSymbols',
'MapExprStmts', 'MapNodes', 'IsPerfectIteration', 'printAST', 'CGen',
'CInterface', 'Transformer', 'Uxreplace']
'MapExprStmts', 'MapHaloSpots', 'MapNodes', 'IsPerfectIteration',
'printAST', 'CGen', 'CInterface', 'Transformer', 'Uxreplace']


class Visitor(GenericVisitor):
Expand Down Expand Up @@ -737,14 +737,17 @@ def visit_Conditional(self, o, ret=None, queue=None):
return ret


class MapExprStmts(FindSections):
class MapKind(FindSections):

"""
Construct a mapper from ExprStmts, i.e. expression statements such as Calls
and Expressions, to their enclosing block (e.g., Iteration, Block).
Base class to construct mappers from Nodes of given type to their enclosing
scope of Nodes.
"""

def visit_ExprStmt(self, o, ret=None, queue=None):
# NOTE: Ideally, we would use a metaclass that dynamically constructs mappers
# for the kind supplied by the caller, but it'd be overkill at the moment

def visit_dummy(self, o, ret=None, queue=None):
if ret is None:
ret = self.default_retval()
ret[o] = as_tuple(queue)
Expand All @@ -754,6 +757,14 @@ def visit_ExprStmt(self, o, ret=None, queue=None):
visit_Block = FindSections.visit_Iteration


class MapExprStmts(MapKind):
visit_ExprStmt = MapKind.visit_dummy


class MapHaloSpots(MapKind):
visit_HaloSpot = MapKind.visit_dummy


class MapNodes(Visitor):

@classmethod
Expand Down
30 changes: 25 additions & 5 deletions devito/ir/stree/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from devito.ir.stree.tree import (ScheduleTree, NodeIteration, NodeConditional,
NodeSync, NodeExprs, NodeSection, NodeHalo)
from devito.ir.support import (SEQUENTIAL, Any, Interval, IterationInterval,
IterationSpace, normalize_properties, normalize_syncs)
IterationSpace, WaitLock, normalize_properties,
normalize_syncs)
from devito.mpi.halo_scheme import HaloScheme
from devito.tools import Bunch, DefaultOrderedDict

Expand Down Expand Up @@ -176,15 +177,34 @@ def reuse_partial_subtree(c0, c1, d=None):


def reuse_whole_subtree(c0, c1, d=None):
return (c0.guards.get(d) == c1.guards.get(d) and
c0.syncs.get(d) == c1.syncs.get(d))
if not reuse_partial_subtree(c0, c1, d):
return False

syncs0 = c0.syncs.get(d, [])
syncs1 = c1.syncs.get(d, [])

if syncs0 == syncs1:
return True
elif not syncs0 and all(isinstance(s, WaitLock) for s in syncs1):
return True

return False


def augment_partial_subtree(cluster, tip, mapper, it=None):
d = it.dim

if d in cluster.syncs:
tip = NodeSync(cluster.syncs[d], tip)
try:
syncs = cluster.syncs[d]
if all(isinstance(s, WaitLock) for s in syncs):
# Unlike all other SyncOps, a WaitLock "floats" in the stree, in that
# it doesn't need to wrap any subtree. Thus, a WaitLock acts like
# a barrier to what follows inside `d`
NodeSync(syncs, tip)
else:
tip = NodeSync(syncs, tip)
except KeyError:
pass

mapper[it].bottom = tip

Expand Down
49 changes: 49 additions & 0 deletions devito/mpi/halo_scheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from devito import configuration
from devito.data import CORE, OWNED, LEFT, CENTER, RIGHT
from devito.ir.support import Forward, Scope
from devito.symbolics.manipulation import _uxreplace_registry
from devito.tools import (Reconstructable, Tag, as_tuple, filter_ordered, flatten,
frozendict, is_integer)
from devito.types import Grid
Expand Down Expand Up @@ -583,3 +584,51 @@ def __eq__(self, other):
return isinstance(other, HaloTouch) and self.halo_scheme == other.halo_scheme

func = Reconstructable._rebuild


def _uxreplace_dispatch_haloscheme(hs0, rule):
changed = False
hs = hs0
for f, hse0 in hs0.fmapper.items():
# Is it an attempt to replace `f`?
for i, v in rule.items():
if i is f:
# Yes!
g = v
hse = hse0

elif i.is_Indexed and i.function is f and v.is_Indexed:
# Yes, but through an Indexed, hence the `loc_indices` may now
# differ; let's infer them from the context
g = v.function

loc_indices = {}
loc_dirs = {}
for d0, loc_index in hse0.loc_indices.items():
if i.indices[d0] == loc_index:
# They indeed do change
d1 = g.indices[d0]
loc_indices[d1] = v.indices[d0]
loc_dirs[d1] = hse0.loc_dirs[d0]

if len(loc_indices) != len(hse0.loc_indices):
# Nope, let's try with the next Indexed, if any
continue

hse = HaloSchemeEntry(frozendict(loc_indices),
frozendict(loc_dirs),
hse0.halos, hse0.dims)

else:
continue

hs = hs.drop(f).add(g, hse)
changed |= True

break

return hs, changed


_uxreplace_registry.register(HaloTouch,
{HaloScheme: _uxreplace_dispatch_haloscheme})
68 changes: 43 additions & 25 deletions devito/mpi/routines.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from devito.symbolics import (Byref, CondNe, FieldFromPointer, FieldFromComposite,
IndexedPointer, Macro, cast_mapper, subs_op_args)
from devito.tools import (as_mapper, dtype_to_mpitype, dtype_len, dtype_to_ctype,
flatten, generator, split)
flatten, generator, is_integer, split)
from devito.types import (Array, Bundle, Dimension, Eq, Symbol, LocalObject,
CompositeObject, CustomDimension)

Expand Down Expand Up @@ -338,35 +338,37 @@ def _make_all(self, f, hse, msg):

def _make_copy(self, f, hse, key, swap=False):
dims = [d.root for d in f.dimensions if d not in hse.loc_indices]
ofs = [Symbol(name='o%s' % d.root, is_const=True) for d in f.dimensions]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's the same symbol as grid.origin, doesn't seem to create an issue anywhere but seems risky


f_offsets = []
f_indices = []
for d, h in zip(f.dimensions, f._size_nodomain.left):
offset = Symbol(name='o%s' % d.root, is_const=True)
f_offsets.append(offset)
offset_nohalo = offset - h
f_indices.append(offset_nohalo + (d.root if d not in hse.loc_indices else 0))
bshape = [Symbol(name='b%s' % d.symbolic_size) for d in dims]
bdims = [CustomDimension(name=d.name, parent=d, symbolic_size=s)
for d, s in zip(dims, bshape)]

eqns = []
eqns.extend([Eq(d.symbolic_min, 0) for d in dims])
eqns.extend([Eq(d.symbolic_max, d.symbolic_size - 1) for d in dims])
eqns.extend([Eq(d.symbolic_min, 0) for d in bdims])
eqns.extend([Eq(d.symbolic_max, d.symbolic_size - 1) for d in bdims])

vd = CustomDimension(name='vd', symbolic_size=f.ncomp)
buf = Array(name='buf', dimensions=[vd] + bdims, dtype=f.c0.dtype,
padding=0)

mapper = dict(zip(dims, bdims))
findices = [o - h + mapper.get(d.root, 0)
for d, o, h in zip(f.dimensions, ofs, f._size_nodomain.left)]

bdims = [CustomDimension(name='vd', symbolic_size=f.ncomp)] + dims
buf = Array(name='buf', dimensions=bdims, dtype=f.c0.dtype, padding=0)
if swap is False:
swap = lambda i, j: (i, j)
name = 'gather%s' % key
else:
swap = lambda i, j: (j, i)
name = 'scatter%s' % key
for i, c in enumerate(f.components):
eqns.append(Eq(*swap(buf[[i] + dims], c[f_indices])))
eqns.append(Eq(*swap(buf[[i] + bdims], c[findices])))

# Compile `eqns` into an IET via recursive compilation
irs, _ = self.rcompile(eqns)

shape = [d.symbolic_size for d in dims]
parameters = [buf] + shape + list(f.components) + f_offsets
parameters = [buf] + bshape + list(f.components) + ofs

return CopyBuffer(name, irs.uiet, parameters)

Expand Down Expand Up @@ -1156,7 +1158,19 @@ def halos(self):
def npeers(self):
return len(self._halos)

def _arg_defaults(self, allocator, alias):
def _as_number(self, v, args):
"""
Turn a sympy.Symbol into a number. In doing so, perform a number of
sanity checks to ensure we get a Symbol iff the Msg is for an Array.
"""
if is_integer(v):
return int(v)
else:
assert self.target.c0.is_Array
assert args is not None
return int(v.subs(args))

def _arg_defaults(self, allocator, alias, args=None):
# Lazy initialization if `allocator` is necessary as the `allocator`
# type isn't really known until an Operator is constructed
self._allocator = allocator
Expand All @@ -1165,14 +1179,14 @@ def _arg_defaults(self, allocator, alias):
for i, halo in enumerate(self.halos):
entry = self.value[i]

# Buffer size for this peer
# Buffer shape for this peer
shape = []
for dim, side in zip(*halo):
try:
shape.append(getattr(f._size_owned[dim], side.name))
except AttributeError:
assert side is CENTER
shape.append(f._size_domain[dim])
shape.append(self._as_number(f._size_domain[dim], args))
entry.sizes = (c_int*len(shape))(*shape)

# Allocate the send/recv buffers
Expand All @@ -1181,8 +1195,8 @@ def _arg_defaults(self, allocator, alias):
entry.bufg, bufg_memfree_args = allocator._alloc_C_libcall(size, ctype)
entry.bufs, bufs_memfree_args = allocator._alloc_C_libcall(size, ctype)

# The `memfree_args` will be used to deallocate the buffer upon returning
# from C-land
# The `memfree_args` will be used to deallocate the buffer upon
# returning from C-land
self._memfree_args.extend([bufg_memfree_args, bufs_memfree_args])

return {self.name: self.value}
Expand All @@ -1198,7 +1212,7 @@ def _arg_values(self, args=None, **kwargs):
else:
alias = f

return self._arg_defaults(args.allocator, alias=alias)
return self._arg_defaults(args.allocator, alias=alias, args=args)

def _arg_apply(self, *args, **kwargs):
self._C_memfree()
Expand All @@ -1218,30 +1232,34 @@ class MPIMsgEnriched(MPIMsg):
(_C_field_to, c_int)
]

def _arg_defaults(self, allocator, alias=None):
super()._arg_defaults(allocator, alias)
def _arg_defaults(self, allocator, alias=None, args=None):
super()._arg_defaults(allocator, alias, args=args)

f = alias or self.target.c0
neighborhood = f.grid.distributor.neighborhood

for i, halo in enumerate(self.halos):
entry = self.value[i]

# `torank` peer + gather offsets
entry.torank = neighborhood[halo.side]
ofsg = []
for dim, side in zip(*halo):
try:
ofsg.append(getattr(f._offset_owned[dim], side.name))
v = getattr(f._offset_owned[dim], side.name)
ofsg.append(self._as_number(v, args))
except AttributeError:
assert side is CENTER
ofsg.append(f._offset_owned[dim].left)
entry.ofsg = (c_int*len(ofsg))(*ofsg)

# `fromrank` peer + scatter offsets
entry.fromrank = neighborhood[tuple(i.flip() for i in halo.side)]
ofss = []
for dim, side in zip(*halo):
try:
ofss.append(getattr(f._offset_halo[dim], side.flip().name))
v = getattr(f._offset_halo[dim], side.flip().name)
ofss.append(self._as_number(v, args))
except AttributeError:
assert side is CENTER
# Note `_offset_owned`, and not `_offset_halo`, is *not* a bug
Expand Down
Loading