Skip to content

Commit

Permalink
compiler: Revamp Parizer scoring function
Browse files Browse the repository at this point in the history
  • Loading branch information
FabioLuporini committed Oct 10, 2023
1 parent 2b09a82 commit a70fafb
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 36 deletions.
3 changes: 0 additions & 3 deletions devito/passes/iet/langbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,6 @@ def DeviceIteration(self):
def Prodder(self):
return self.lang.Prodder

def _device_pointers(self, *args, **kwargs):
return {}


class DeviceAwareMixin(object):

Expand Down
74 changes: 49 additions & 25 deletions devito/passes/iet/parpragma.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from itertools import takewhile

import numpy as np
import cgen as c
from cached_property import cached_property
Expand Down Expand Up @@ -254,6 +256,36 @@ def nthreads_nonaffine(self):
def threadid(self):
return self.sregistry.threadid

def _score_candidate(self, n0, root, collapsable=()):
"""
The score of a collapsable nest depends on the number of fully-parallel
Iterations and their position in the nest (the outer, the better).
"""
nest = [root] + list(collapsable)
n = len(nest)

# Number of fully-parallel collapsable Iterations
key = lambda i: i.is_ParallelNoAtomic
fpiters = list(takewhile(key, nest))
nfpiters = len(fpiters)

# Prioritize the Dimensions that are more likely to define larger
# iteration spaces
fpdims = [i.dim for i in fpiters]
key = lambda d: (not d.is_Derived or
d.is_Custom or # NOTE: might use a refinement
(d.is_Block and d._depth == 1))
nfpiters_large = len([d for d in fpdims if key(d)])

return (
int(nfpiters == n), # Fully-parallel nest
int(nfpiters == 0 and n), # Fully-atomic nest
nfpiters_large,
-(n0 + 1), # The outer, the better
nfpiters,
n,
)

def _select_candidates(self, candidates):
assert candidates

Expand All @@ -263,15 +295,18 @@ def _select_candidates(self, candidates):
mapper = {}
for n0, root in enumerate(candidates):

# Score `root` in isolation
mapper[(root, ())] = self._score_candidate(n0, root)

collapsable = []
for n, i in enumerate(candidates[n0+1:], n0+1):
# The Iteration nest [root, ..., i] must be perfect
if not IsPerfectIteration(depth=i).visit(root):
break

# Loops are collapsable only if none of the iteration variables appear
# in initializer expressions. For example, the following two loops
# cannot be collapsed
# Loops are collapsable only if none of the iteration variables
# appear in initializer expressions. For example, the following
# two loops cannot be collapsed
#
# for (i = ... )
# for (j = i ...)
Expand All @@ -281,7 +316,7 @@ def _select_candidates(self, candidates):
if any(j.dim in i.symbolic_min.free_symbols for j in candidates[n0:n]):
break

# Also, we do not want to collapse SIMD-vectorized Iterations
# Can't collapse SIMD-vectorized Iterations
if i.is_Vectorized:
break

Expand All @@ -297,17 +332,9 @@ def _select_candidates(self, candidates):

collapsable.append(i)

# Give a score to this candidate, based on the number of fully-parallel
# Iterations and their position (i.e. outermost to innermost) in the nest
score = (
int(root.is_ParallelNoAtomic),
len(self._device_pointers(root)), # Outermost offloadable
int(len([i for i in collapsable if i.is_ParallelNoAtomic]) >= 1),
int(len([i for i in collapsable if i.is_ParallelRelaxed]) >= 1),
-(n0 + 1) # The outermost, the better
)

mapper[(root, tuple(collapsable))] = score
# Score `root + collapsable`
v = tuple(collapsable)
mapper[(root, v)] = self._score_candidate(n0, root, v)

# Retrieve the candidates with highest score
root, collapsable = max(mapper, key=mapper.get)
Expand All @@ -318,16 +345,6 @@ def _make_reductions(self, partree):
if not any(i.is_ParallelAtomic for i in partree.collapsed):
return partree

# We bypass the corner case where a reduction might not be optimal, mainly:
# - Only the most inner loop is atomic
# In which case we can parallelize the perfect nest
# The opposite corner case (most outer loop atomic)
# should be detected before this pass
nc = len(partree.collapsed)
if nc > 1 and all(i.is_ParallelNoAtomic for i in partree.collapsed[:nc-1]):
mapper = {partree.root: partree.root._rebuild(ncollapsed=nc-1)}
return Transformer(mapper).visit(partree)

exprs = [i for i in FindNodes(Expression).visit(partree) if i.is_reduction]

reductions = []
Expand Down Expand Up @@ -586,6 +603,13 @@ def __init__(self, sregistry, options, platform, compiler):
self.par_tile = UnboundTuple(options['par-tile'])
self.par_disabled = options['par-disabled']

def _score_candidate(self, n0, root, collapsable=()):
# `ndptrs`, the number of device pointers, part of the score too to
# ensure the outermost loop is offloaded
ndptrs = len(self._device_pointers(root))

return (ndptrs,) + super()._score_candidate(n0, root, collapsable)

def _make_threaded_prodders(self, partree):
if isinstance(partree.root, self.DeviceIteration):
# no-op for now
Expand Down
18 changes: 10 additions & 8 deletions tests/test_dle.py
Original file line number Diff line number Diff line change
Expand Up @@ -863,8 +863,9 @@ def test_incs_no_atomic(self):
op0 = Operator(Inc(uf, 1), opt=('advanced', {'openmp': True,
'par-collapse-ncores': 1,
'par-collapse-work': 0}))
assert 'collapse(3)' in str(op0)
assert 'atomic' in str(op0)
assert 'omp for schedule' in str(op0)
assert 'collapse' not in str(op0)
assert 'atomic' not in str(op0)

# Now only `x` is parallelized
op1 = Operator([Eq(v[t, x, 0, 0], v[t, x, 0, 0] + 1), Inc(uf, 1)],
Expand All @@ -877,22 +878,23 @@ def test_incs_no_atomic(self):
def test_incr_perfect_outer(self):
grid = Grid((5, 5))
d = Dimension(name="d")

u = Function(name="u", dimensions=(*grid.dimensions, d),
grid=grid, shape=(*grid.shape, 5), )
v = Function(name="v", dimensions=(*grid.dimensions, d),
grid=grid, shape=(*grid.shape, 5))
w = Function(name="w", grid=grid)

u.data.fill(1)
v.data.fill(2)

w = Function(name="w", grid=grid)

summation = Inc(w, u*v)

op0 = Operator([summation], opt=('advanced', {'openmp': True}))
assert 'reduction' not in str(op0)
assert 'omp for' in str(op0)
op = Operator([summation], opt=('advanced', {'openmp': True}))
assert 'reduction' not in str(op)
assert 'omp for' in str(op)

op0()
op()
assert np.all(w.data == 10)

@pytest.mark.parametrize('exprs,simd_level,expected', [
Expand Down

0 comments on commit a70fafb

Please sign in to comment.