Skip to content

Commit

Permalink
Merge pull request #1634 from devitocodes/clang-omp-workaround
Browse files Browse the repository at this point in the history
compiler: Work around clang[10,11,?] omp-offloading bug
  • Loading branch information
FabioLuporini authored Mar 18, 2021
2 parents 1e619c0 + 0f27388 commit bfd826c
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 31 deletions.
12 changes: 9 additions & 3 deletions devito/ir/iet/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,15 @@ class Node(Signer):

def __new__(cls, *args, **kwargs):
obj = super(Node, cls).__new__(cls)
argnames = inspect.getfullargspec(cls.__init__).args
argnames, _, _, defaultvalues, _, _, _ = inspect.getfullargspec(cls.__init__)
try:
defaults = dict(zip(argnames[-len(defaultvalues):], defaultvalues))
except TypeError:
# No default kwarg values
defaults = {}
obj._args = {k: v for k, v in zip(argnames[1:], args)}
obj._args.update(kwargs.items())
obj._args.update({k: None for k in argnames[1:] if k not in obj._args})
obj._args.update({k: defaults.get(k) for k in argnames[1:] if k not in obj._args})
return obj

def _rebuild(self, *args, **kwargs):
Expand Down Expand Up @@ -789,9 +794,10 @@ class PointerCast(ExprStmt, Node):

is_PointerCast = True

def __init__(self, function, obj=None):
def __init__(self, function, obj=None, alignment=True):
self.function = function
self.obj = obj
self.alignment = alignment

def __repr__(self):
return "<PointerCast(%s)>" % self.function
Expand Down
6 changes: 3 additions & 3 deletions devito/ir/iet/visitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,9 @@ def visit_PointerCast(self, o):
f._C_field_data)
else:
rvalue = '(%s (*)%s) %s' % (f._C_typedata, shape, obj)
lvalue = c.AlignedAttribute(f._data_alignment,
c.Value(f._C_typedata,
'(*restrict %s)%s' % (f.name, shape)))
lvalue = c.Value(f._C_typedata, '(*restrict %s)%s' % (f.name, shape))
if o.alignment:
lvalue = c.AlignedAttribute(f._data_alignment, lvalue)
return c.Initializer(lvalue, rvalue)

def visit_Dereference(self, o):
Expand Down
4 changes: 2 additions & 2 deletions devito/passes/iet/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import cgen as c

from devito.ir import (EntryFunction, List, LocalExpression, PointerCast, FindSymbols,
from devito.ir import (EntryFunction, List, LocalExpression, FindSymbols,
MapExprStmts, Transformer)
from devito.passes.iet.engine import iet_pass
from devito.passes.iet.langbase import LangBB
Expand Down Expand Up @@ -284,7 +284,7 @@ def place_casts(self, iet):
symbol_names = {i.name for i in FindSymbols('free-symbols').visit(iet)}
need_cast = {i for i in need_cast if i.name in symbol_names}

casts = tuple(PointerCast(i) for i in iet.parameters if i in need_cast)
casts = tuple(self.lang.PointerCast(i) for i in iet.parameters if i in need_cast)
if casts:
casts = (List(body=casts, footer=c.Line()),)

Expand Down
22 changes: 4 additions & 18 deletions devito/passes/iet/langbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import cgen as c

from devito.ir import (DummyEq, Call, Conditional, List, Prodder, ParallelIteration,
ParallelBlock, EntryFunction, LocalExpression)
ParallelBlock, PointerCast, EntryFunction, LocalExpression)
from devito.mpi.distributed import MPICommObject
from devito.passes.iet.engine import iet_pass
from devito.symbolics import Byref, CondNe
Expand Down Expand Up @@ -34,27 +34,13 @@ class LangBB(object, metaclass=LangMeta):
Abstract base class for Language Building Blocks.
"""

# Note: below dummy values are used, so a subclass should override them

# NOTE: a subclass may want to override the values below, which represent
# IET node types used in the various lowering and/or transformation passes
Region = ParallelBlock
"""
The IET node type to be used to construct a parallel region.
"""

HostIteration = ParallelIteration
"""
The IET node type to be used to construct a host-parallel Iteration.
"""

DeviceIteration = ParallelIteration
"""
The IET node type to be used to construct a device-parallel Iteration.
"""

Prodder = Prodder
"""
The IET node type to be used to construct asynchronous prodders.
"""
PointerCast = PointerCast

@classmethod
def _map_to(cls, f, imask=None, queueid=None):
Expand Down
15 changes: 11 additions & 4 deletions devito/passes/iet/languages/openmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from devito.arch import AMDGPUX, NVIDIAX
from devito.ir import (Block, Call, Conditional, List, Prodder, ParallelIteration,
ParallelBlock, While, FindNodes, Transformer)
ParallelBlock, PointerCast, While, FindNodes, Transformer)
from devito.mpi.routines import IrecvCall, IsendCall
from devito.passes.iet.definitions import DataManager, DeviceAwareDataManager
from devito.passes.iet.engine import iet_pass
Expand Down Expand Up @@ -151,6 +151,13 @@ class OmpBB(PragmaLangBB):
Prodder = ThreadedProdder


class DeviceOmpBB(OmpBB):

# NOTE: Work around clang>=10 issue concerning offloading arrays declared
# with an `__attribute__(aligned(...))` qualifier
PointerCast = lambda *args: PointerCast(*args, alignment=False)


class SimdOmpizer(PragmaSimdTransformer):
lang = OmpBB

Expand All @@ -161,7 +168,7 @@ class Ompizer(PragmaShmTransformer):

class DeviceOmpizer(PragmaDeviceAwareTransformer):

lang = OmpBB
lang = DeviceOmpBB

@iet_pass
def make_gpudirect(self, iet):
Expand All @@ -181,8 +188,8 @@ class OmpDataManager(DataManager):


class DeviceOmpDataManager(DeviceAwareDataManager):
lang = OmpBB
lang = DeviceOmpBB


class OmpOrchestrator(Orchestrator):
lang = OmpBB
lang = DeviceOmpBB
2 changes: 1 addition & 1 deletion examples/performance/01_gpu.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@
" }\n",
" /* End of OpenMP setup */\n",
"\n",
" float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data;\n",
" float (*restrict u)[u_vec->size[1]][u_vec->size[2]] = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data;\n",
"\n",
" #pragma omp target enter data map(to: u[0:u_vec->size[0]][0:u_vec->size[1]][0:u_vec->size[2]])\n",
"\n",
Expand Down

0 comments on commit bfd826c

Please sign in to comment.