Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compiler: Add host-*-pin handles; more volatile with pthreads #2116

Merged
merged 2 commits into from
Apr 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion devito/ir/iet/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from devito.tools import filter_ordered
from devito.types import Global

__all__ = ['filter_iterations', 'retrieve_iteration_tree', 'derive_parameters']
__all__ = ['filter_iterations', 'retrieve_iteration_tree', 'derive_parameters',
'maybe_alias']


class IterationTree(tuple):
Expand Down Expand Up @@ -122,3 +123,34 @@ def derive_parameters(iet, drop_locals=False):
parameters = [p for p in parameters if not (p.is_ArrayBasic or p.is_LocalObject)]

return parameters


def maybe_alias(obj, candidate):
"""
True if `candidate` can act as an alias for `obj`, False otherwise.
"""
if obj is candidate:
return True

# Names are unique throughout compilation, so this is another case we can handle
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the compilation (?)

# straightforwardly. It might happen that we have an alias used in a subroutine
# with different type qualifiers (e.g., const vs not const, volatile vs not
# volatile), but if the names match, they definitely represent the same
# logical object
if obj.name == candidate.name:
return True

if obj.is_AbstractFunction:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these case really needed? Will obviously always return false passed this line no matter

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not strictly speaking and not today, but they were in the back of my mind already, so I put them there for the future

if not candidate.is_AbstractFunction:
# Obv
return False

# E.g. TimeFunction vs SparseFunction -> False
if type(obj).__base__ is not type(candidate).__base__:
return False

# TODO: At some point we may need to introduce some logic here, but we'll
# also need to introduce something like __eq_weak__ that compares most of
# the __rkwargs__ except for e.g. the name

return False
28 changes: 25 additions & 3 deletions devito/passes/iet/asynchrony.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from collections import OrderedDict
from ctypes import c_int

import cgen as c

from devito.ir import (AsyncCall, AsyncCallable, BlankLine, Call, Callable,
Conditional, Dereference, DummyExpr, FindNodes, FindSymbols,
Iteration, List, PointerCast, Return, ThreadCallable,
Transformer, While)
Transformer, While, maybe_alias)
from devito.passes.iet.engine import iet_pass
from devito.symbolics import (CondEq, CondNe, FieldFromComposite, FieldFromPointer,
Null)
from devito.tools import DefaultOrderedDict, Bunch, split
from devito.types import Lock, Pointer, PThreadArray, QueueID, SharedData, Symbol
from devito.types import (Lock, Pointer, PThreadArray, QueueID, SharedData, Symbol,
VolatileInt)

__all__ = ['pthreadify']

Expand Down Expand Up @@ -48,6 +50,9 @@ def lower_async_callables(iet, track=None, root=None, sregistry=None):
defines = FindSymbols('defines').visit(root.body)
ncfields, cfields = split(fields, lambda i: i in defines)

# Postprocess `ncfields`
ncfields = sanitize_ncfields(ncfields)

# SharedData -- that is the data structure that will be used by the
# main thread to pass information down to the child thread(s)
sdata = track[iet.name].sdata = SharedData(name='sdata',
Expand Down Expand Up @@ -135,7 +140,7 @@ def lower_async_calls(iet, track=None, sregistry=None):
d = threads.index
arguments = []
for a in n.arguments:
if a in sdata.ncfields:
if any(maybe_alias(a, i) for i in sdata.ncfields):
continue
elif isinstance(a, QueueID):
# Different pthreads use different queues
Expand Down Expand Up @@ -208,3 +213,20 @@ def lower_async_calls(iet, track=None, sregistry=None):
assert not finalization

return iet, {'efuncs': tuple(efuncs.values())}


# *** Utils

def sanitize_ncfields(ncfields):
# Due to a bug in the NVC compiler (v<=22.7 and potentially later),
# we have to use C's `volatile` more extensively than strictly necessary
# to avoid flaky optimizations that would cause fauly behaviour in rare,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

faulty*

# non-deterministic scenarios
sanitized = []
for i in ncfields:
if i._C_ctype is c_int:
sanitized.append(VolatileInt(name=i.name))
else:
sanitized.append(i)

return sanitized
4 changes: 2 additions & 2 deletions devito/passes/iet/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,12 @@ def _alloc_mapped_array_on_high_bw_mem(self, site, obj, storage, *args):

ffp1 = FieldFromPointer(obj._C_field_data, obj._C_symbol)
memptr = VOID(Byref(ffp1), '**')
allocs.append(self.lang['host-alloc'](memptr, alignment, nbytes_param))
allocs.append(self.lang['host-alloc-pin'](memptr, alignment, nbytes_param))

ffp0 = FieldFromPointer(obj._C_field_nbytes, obj._C_symbol)
init = DummyExpr(ffp0, nbytes_param)

frees = [self.lang['host-free'](ffp1),
frees = [self.lang['host-free-pin'](ffp1),
self.lang['host-free'](obj._C_symbol)]

# Not all backends require explicit allocation/deallocation of the
Expand Down
4 changes: 4 additions & 0 deletions devito/passes/iet/languages/C.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@ class CBB(LangBB):
'header-memcpy': 'string.h',
'host-alloc': lambda i, j, k:
Call('posix_memalign', (i, j, k)),
'host-alloc-pin': lambda i, j, k:
Call('posix_memalign', (i, j, k)),
'host-free': lambda i:
Call('free', (i,)),
'host-free-pin': lambda i:
Call('free', (i,)),
'alloc-global-symbol': lambda i, j, k:
Call('memcpy', (i, j, k))
}
Expand Down
2 changes: 1 addition & 1 deletion tests/test_gpu_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def test_tasking_fused(self):
exprs = FindNodes(Expression).visit(op._func_table['copy_to_host0'].root)
b = 13 if configuration['language'] == 'openacc' else 12 # No `qid` w/ OMP
assert str(exprs[b]) == 'const int deviceid = sdata->deviceid;'
assert str(exprs[b+1]) == 'int time = sdata->time;'
assert str(exprs[b+1]) == 'volatile int time = sdata->time;'
assert str(exprs[b+2]) == 'lock0[0] = 1;'
assert exprs[b+3].write is u
assert exprs[b+4].write is v
Expand Down