Skip to content

Commit

Permalink
compiler: Avoid omp array reductions with gcc5 as unsupported
Browse files Browse the repository at this point in the history
  • Loading branch information
FabioLuporini committed Apr 12, 2022
1 parent 57b6ca3 commit 9b5c3d7
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 15 deletions.
9 changes: 6 additions & 3 deletions devito/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,15 @@ class Cpu64NoopOperator(Cpu64OperatorMixin, CoreOperator):
def _specialize_iet(cls, graph, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

# Distributed-memory parallelism
mpiize(graph, sregistry=sregistry, options=options)

# Shared-memory parallelism
if options['openmp']:
parizer = cls._Target.Parizer(sregistry, options, platform)
parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer.make_parallel(graph)
parizer.initialize(graph)

Expand Down Expand Up @@ -213,6 +214,7 @@ def _specialize_clusters(cls, clusters, **kwargs):
def _specialize_iet(cls, graph, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

# Flush denormal numbers
Expand All @@ -225,7 +227,7 @@ def _specialize_iet(cls, graph, **kwargs):
relax_incr_dimensions(graph)

# Parallelism
parizer = cls._Target.Parizer(sregistry, options, platform)
parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer.make_simd(graph)
parizer.make_parallel(graph)
parizer.initialize(graph)
Expand Down Expand Up @@ -304,9 +306,10 @@ def callback(f):
def _make_iet_passes_mapper(cls, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

parizer = cls._Target.Parizer(sregistry, options, platform)
parizer = cls._Target.Parizer(sregistry, options, platform, compiler)

return {
'denormals': avoid_denormals,
Expand Down
6 changes: 4 additions & 2 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ def _specialize_clusters(cls, clusters, **kwargs):
def _specialize_iet(cls, graph, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

# Distributed-memory parallelism
Expand All @@ -211,7 +212,7 @@ def _specialize_iet(cls, graph, **kwargs):
relax_incr_dimensions(graph)

# GPU parallelism
parizer = cls._Target.Parizer(sregistry, options, platform)
parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
parizer.make_parallel(graph)
parizer.initialize(graph)

Expand Down Expand Up @@ -282,9 +283,10 @@ def callback(f):
def _make_iet_passes_mapper(cls, **kwargs):
options = kwargs['options']
platform = kwargs['platform']
compiler = kwargs['compiler']
sregistry = kwargs['sregistry']

parizer = cls._Target.Parizer(sregistry, options, platform)
parizer = cls._Target.Parizer(sregistry, options, platform, compiler)
orchestrator = cls._Target.Orchestrator(sregistry)

return {
Expand Down
5 changes: 4 additions & 1 deletion devito/passes/iet/langbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ class LangTransformer(ABC):
The constructs of the target language. To be specialized by a subclass.
"""

def __init__(self, key, sregistry, platform):
def __init__(self, key, sregistry, platform, compiler):
"""
Parameters
----------
Expand All @@ -195,13 +195,16 @@ def __init__(self, key, sregistry, platform):
The symbol registry, to access the symbols appearing in an IET.
platform : Platform
The underlying platform.
compiler : Compiler
The underlying JIT compiler.
"""
if key is not None:
self.key = key
else:
self.key = lambda i: False
self.sregistry = sregistry
self.platform = platform
self.compiler = compiler

@iet_pass
def make_parallel(self, iet):
Expand Down
13 changes: 13 additions & 0 deletions devito/passes/iet/languages/openmp.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from distutils import version

import cgen as c
from sympy import Not

from devito.arch import AMDGPUX, NVIDIAX, INTELGPUX
from devito.arch.compiler import GNUCompiler
from devito.ir import (Call, Conditional, List, Prodder, ParallelIteration,
ParallelBlock, PointerCast, While, FindSymbols)
from devito.passes.iet.definitions import DataManager, DeviceAwareDataManager
Expand Down Expand Up @@ -186,8 +189,18 @@ class SimdOmpizer(PragmaSimdTransformer):


class Ompizer(PragmaShmTransformer):

lang = OmpBB

@classmethod
def _support_array_reduction(cls, compiler):
# Not all backend compilers support array reduction!
# Here are the known unsupported ones:
if isinstance(compiler, GNUCompiler) and \
compiler.version < version.StrictVersion("6.0"):
return False
return True


class DeviceOmpizer(PragmaDeviceAwareTransformer):
lang = DeviceOmpBB
Expand Down
22 changes: 16 additions & 6 deletions devito/passes/iet/parpragma.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class PragmaShmTransformer(PragmaSimdTransformer):
and shared-memory-parallel IETs.
"""

def __init__(self, sregistry, options, platform):
def __init__(self, sregistry, options, platform, compiler):
"""
Parameters
----------
Expand All @@ -116,9 +116,11 @@ def __init__(self, sregistry, options, platform):
is greater than this threshold.
platform : Platform
The underlying platform.
compiler : Compiler
The underlying JIT compiler.
"""
key = lambda i: i.is_ParallelRelaxed and not i.is_Vectorized
super().__init__(key, sregistry, platform)
super().__init__(key, sregistry, platform, compiler)

self.collapse_ncores = options['par-collapse-ncores']
self.collapse_work = options['par-collapse-work']
Expand Down Expand Up @@ -209,14 +211,22 @@ def _select_candidates(self, candidates):

return root, list(collapsable)

@classmethod
def _support_array_reduction(cls, compiler):
return True

def _make_reductions(self, partree):
if not any(i.is_ParallelAtomic for i in partree.collapsed):
return partree

exprs = [i for i in FindNodes(Expression).visit(partree) if i.is_Increment]
reduction = [i.output for i in exprs]
if all(i.is_Affine for i in partree.collapsed) or \
all(not i.is_Indexed for i in reduction):

test0 = all(not i.is_Indexed for i in reduction)
test1 = (self._support_array_reduction(self.compiler) and
all(i.is_Affine for i in partree.collapsed))

if test0 or test1:
# Implement reduction
mapper = {partree.root: partree.root._rebuild(reduction=reduction)}
else:
Expand Down Expand Up @@ -399,8 +409,8 @@ class PragmaDeviceAwareTransformer(DeviceAwareMixin, PragmaShmTransformer):
shared-memory-parallel, and device-parallel IETs.
"""

def __init__(self, sregistry, options, platform):
super().__init__(sregistry, options, platform)
def __init__(self, sregistry, options, platform, compiler):
super().__init__(sregistry, options, platform, compiler)

self.gpu_fit = options['gpu-fit']
self.par_tile = options['par-tile']
Expand Down
10 changes: 7 additions & 3 deletions tests/test_dle.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@

from conftest import assert_structure, assert_blocking, _R
from devito import (Grid, Function, TimeFunction, SparseTimeFunction, SpaceDimension,
Dimension, SubDimension, Eq, Inc, Operator, dimensions, info, cos)
Dimension, SubDimension, Eq, Inc, Operator, configuration,
dimensions, info, cos)
from devito.exceptions import InvalidArgument
from devito.ir.iet import Iteration, FindNodes, retrieve_iteration_tree
from devito.passes.iet.languages.openmp import OmpRegion
from devito.passes.iet.languages.openmp import Ompizer, OmpRegion
from devito.tools import as_tuple
from devito.types import Scalar

Expand Down Expand Up @@ -622,8 +623,11 @@ def test_array_reduction(self, so, dim):
# `z` Iteration gets parallelized, nothing is collapsed, hence no
# reduction is required
assert "reduction" not in parallelized.pragmas[0].value
else:
elif Ompizer._support_array_reduction(configuration['compiler']):
assert "reduction(+:f[0:f_vec->size[0]])" in parallelized.pragmas[0].value
else:
# E.g. old GCC's
assert "atomic update" in str(iterations[-1])

try:
op(time_M=1)
Expand Down

0 comments on commit 9b5c3d7

Please sign in to comment.