Skip to content

Commit

Permalink
Merge pull request #2348 from devitocodes/fix-kernel-offload
Browse files Browse the repository at this point in the history
compiler: Tweak device-aware blocking
  • Loading branch information
FabioLuporini authored Apr 9, 2024
2 parents c836e03 + a26db7c commit 3a8c46e
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 100 deletions.
14 changes: 11 additions & 3 deletions devito/passes/clusters/blocking.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,21 +160,29 @@ def __init__(self, options):
def _make_key_hook(self, cluster, level):
return (is_on_device(cluster.functions, self.gpu_fit),)

def _has_other_blockable_dim(self, cluster, d):
return any(cluster.properties.is_parallel_relaxed(i) and
not self._has_short_trip_count(i)
for i in set(cluster.ispace.itdims) - {d})

def callback(self, clusters, prefix):
if not prefix:
return clusters

d = prefix[-1].dim
if self._has_short_trip_count(d):
return clusters

processed = []
for c in clusters:
if not c.properties.is_parallel_relaxed(d):
return clusters

if is_on_device(c.functions, self.gpu_fit):
if self._has_data_reuse(c):
if self._has_short_trip_count(d):
if self._has_other_blockable_dim(c, d):
return clusters
else:
properties = c.properties.block(d, 'small')
elif self._has_data_reuse(c):
properties = c.properties.block(d)
else:
properties = c.properties.block(d, 'small')
Expand Down
212 changes: 115 additions & 97 deletions tests/test_dle.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from conftest import assert_structure, assert_blocking, _R, skipif
from devito import (Grid, Function, TimeFunction, SparseTimeFunction, SpaceDimension,
CustomDimension, Dimension, SubDimension,
CustomDimension, Dimension, DefaultDimension, SubDimension,
PrecomputedSparseTimeFunction, Eq, Inc, ReduceMin, ReduceMax,
Operator, configuration, dimensions, info, cos)
from devito.exceptions import InvalidArgument
Expand Down Expand Up @@ -179,120 +179,138 @@ def test_cache_blocking_structure_distributed(mode):
assert iters[4].dim is z


def test_cache_blocking_structure_optrelax():
grid = Grid(shape=(8, 8, 8))
class TestBlockingOptRelax:

u = TimeFunction(name="u", grid=grid, space_order=2)
src = SparseTimeFunction(name="src", grid=grid, nt=3, npoint=1,
coordinates=np.array([(0.5, 0.5, 0.5)]))

eqns = [Eq(u.forward, u.dx)]
eqns += src.inject(field=u.forward, expr=src)

op = Operator(eqns, opt=('advanced', {'blockrelax': True}))

bns, _ = assert_blocking(op, {'x0_blk0', 'p_src0_blk0'})

iters = FindNodes(Iteration).visit(bns['p_src0_blk0'])
assert len(iters) == 5
assert iters[0].dim.is_Block
assert iters[1].dim.is_Block


def test_cache_blocking_structure_optrelax_customdim():
grid = Grid(shape=(8, 8, 8))
d = CustomDimension(name='d', symbolic_size=2)
x, y, z = grid.dimensions

u = TimeFunction(name="u", grid=grid)
f = Function(name="f", grid=grid, dimensions=(d, x, y, z), shape=(2,) + grid.shape)

eqn = Eq(f, u[d, x, y, z] + u[d, x + 1, y, z])

op = Operator(eqn, opt=('advanced', {'blockrelax': True}))

_, _ = assert_blocking(op, {'x0_blk0'})
assert_structure(op, ['d,x0_blk0,y0_blk0,z0_blk0,x,y,z'],
'd,x0_blk0,y0_blk0,z0_blk0,x,y,z')


def test_cache_blocking_structure_leftright_subdims():
grid = Grid(shape=(12, 12))
nbl = 3
def test_basic(self):
grid = Grid(shape=(8, 8, 8))

damp = Function(name='damp', grid=grid)
u = TimeFunction(name="u", grid=grid, space_order=2)
src = SparseTimeFunction(name="src", grid=grid, nt=3, npoint=1,
coordinates=np.array([(0.5, 0.5, 0.5)]))

eqns = [Eq(damp, 0.)]
for d in damp.dimensions:
# Left
dl = SubDimension.left(name='%sl' % d.name, parent=d, thickness=nbl)
eqns.extend([Inc(damp.subs({d: dl}), 1.)])
# right
dr = SubDimension.right(name='%sr' % d.name, parent=d, thickness=nbl)
eqns.extend([Inc(damp.subs({d: dr}), 1.)])
eqns = [Eq(u.forward, u.dx)]
eqns += src.inject(field=u.forward, expr=src)

op = Operator(eqns, opt=('fission', 'blocking', {'blockrelax': 'device-aware'}))
op = Operator(eqns, opt=('advanced', {'blockrelax': True}))

bns, _ = assert_blocking(op,
{'x0_blk0', 'xl0_blk0', 'xr0_blk0', 'x1_blk0', 'x2_blk0'})
assert all(IsPerfectIteration().visit(i) for i in bns.values())
assert all(len(FindNodes(Iteration).visit(i)) == 4 for i in bns.values())
bns, _ = assert_blocking(op, {'x0_blk0', 'p_src0_blk0'})

iters = FindNodes(Iteration).visit(bns['p_src0_blk0'])
assert len(iters) == 5
assert iters[0].dim.is_Block
assert iters[1].dim.is_Block

@pytest.mark.parametrize('opt, expected', [('noop', ('ijk', 'ikl')),
(('advanced', {'blockinner': True, 'blockrelax': True}),
('i0_blk0ijk', 'i0_blk0ikl'))])
def test_cache_blocking_structure_optrelax_linalg(opt, expected):
mat_shape = (4, 4)
def test_customdim(self):
grid = Grid(shape=(8, 8, 8))
d = CustomDimension(name='d', symbolic_size=2)
x, y, z = grid.dimensions

i, j, k, l = dimensions('i j k l')
A = Function(name='A', shape=mat_shape, dimensions=(i, j))
B = Function(name='B', shape=mat_shape, dimensions=(j, k))
C = Function(name='C', shape=mat_shape, dimensions=(j, k))
D = Function(name='D', shape=mat_shape, dimensions=(i, k))
E = Function(name='E', shape=mat_shape, dimensions=(k, l))
F = Function(name='F', shape=mat_shape, dimensions=(i, l))
u = TimeFunction(name="u", grid=grid)
f = Function(name="f", grid=grid, dimensions=(d, x, y, z),
shape=(2,) + grid.shape)

eqs = [Inc(D, A*B + A*C), Inc(F, D*E)]
eqn = Eq(f, u[d, x, y, z] + u[d, x + 1, y, z])

A.data[:] = 1
B.data[:] = 1
C.data[:] = 1
E.data[:] = 1
op = Operator(eqn, opt=('advanced', {'blockrelax': True}))

op0 = Operator(eqs, opt=opt)
op0.apply()
assert_structure(op0, expected)
assert np.linalg.norm(D.data) == 32.0
assert np.linalg.norm(F.data) == 128.0
assert_blocking(op, {'x0_blk0'})
assert_structure(op, ['d,x0_blk0,y0_blk0,z0_blk0,x,y,z'],
'd,x0_blk0,y0_blk0,z0_blk0,x,y,z')

def test_defaultdim_alone(self):
grid = Grid(shape=(8, 8, 8))
d = DefaultDimension(name='d', default_value=2)
time = grid.time_dim
x, y, z = grid.dimensions

def test_cache_blocking_structure_optrelax_prec_inject():
grid = Grid(shape=(10, 10))
dt = grid.stepping_dim.spacing
u = TimeFunction(name="u", grid=grid)
f = Function(name="f", grid=grid, dimensions=(d, x, y, z),
shape=(2,) + grid.shape)

eqn = Inc(f, u*cos(time*d))

op = Operator(eqn, opt=('advanced', {'blockrelax': 'device-aware'}))

assert_blocking(op, {'d0_blk0', 'x0_blk0'})
assert_structure(op,
['t,d0_blk0,d', 't,d,x0_blk0,y0_blk0,z0_blk0,x,y,z'],
't,d0_blk0,d,d,x0_blk0,y0_blk0,z0_blk0,x,y,z')

def test_leftright_subdims(self):
grid = Grid(shape=(12, 12))
nbl = 3

damp = Function(name='damp', grid=grid)

eqns = [Eq(damp, 0.)]
for d in damp.dimensions:
# Left
dl = SubDimension.left(name='%sl' % d.name, parent=d, thickness=nbl)
eqns.extend([Inc(damp.subs({d: dl}), 1.)])
# right
dr = SubDimension.right(name='%sr' % d.name, parent=d, thickness=nbl)
eqns.extend([Inc(damp.subs({d: dr}), 1.)])

op = Operator(eqns, opt=('fission', 'blocking', {'blockrelax': 'device-aware'}))

bns, _ = assert_blocking(op, {'x0_blk0', 'xl0_blk0', 'xr0_blk0',
'x1_blk0', 'x2_blk0'})
assert all(IsPerfectIteration().visit(i) for i in bns.values())
assert all(len(FindNodes(Iteration).visit(i)) == 4 for i in bns.values())

@pytest.mark.parametrize('opt, expected', [('noop', ('ijk', 'ikl')),
(('advanced', {'blockinner': True, 'blockrelax': True}),
('i0_blk0ijk', 'i0_blk0ikl'))])
def test_linalg(self, opt, expected):
mat_shape = (4, 4)

i, j, k, l = dimensions('i j k l')
A = Function(name='A', shape=mat_shape, dimensions=(i, j))
B = Function(name='B', shape=mat_shape, dimensions=(j, k))
C = Function(name='C', shape=mat_shape, dimensions=(j, k))
D = Function(name='D', shape=mat_shape, dimensions=(i, k))
E = Function(name='E', shape=mat_shape, dimensions=(k, l))
F = Function(name='F', shape=mat_shape, dimensions=(i, l))

eqs = [Inc(D, A*B + A*C), Inc(F, D*E)]

A.data[:] = 1
B.data[:] = 1
C.data[:] = 1
E.data[:] = 1

op0 = Operator(eqs, opt=opt)
op0.apply()
assert_structure(op0, expected)
assert np.linalg.norm(D.data) == 32.0
assert np.linalg.norm(F.data) == 128.0

def test_prec_inject(self):
grid = Grid(shape=(10, 10))
dt = grid.stepping_dim.spacing

u = TimeFunction(name="u", grid=grid, time_order=2, space_order=4)
u = TimeFunction(name="u", grid=grid, time_order=2, space_order=4)

# The values we put it don't matter, we won't run an Operator
points = [(0.05, 0.9), (0.01, 0.8), (0.07, 0.84)]
gridpoints = [(5, 90), (1, 80), (7, 84)]
interpolation_coeffs = np.ndarray(shape=(3, 2, 2))
sf = PrecomputedSparseTimeFunction(
name='s', grid=grid, r=2, npoint=len(points), nt=5,
gridpoints=gridpoints, interpolation_coeffs=interpolation_coeffs
)
# The values we put it don't matter, we won't run an Operator
points = [(0.05, 0.9), (0.01, 0.8), (0.07, 0.84)]
gridpoints = [(5, 90), (1, 80), (7, 84)]
interpolation_coeffs = np.ndarray(shape=(3, 2, 2))
sf = PrecomputedSparseTimeFunction(
name='s', grid=grid, r=2, npoint=len(points), nt=5,
gridpoints=gridpoints, interpolation_coeffs=interpolation_coeffs
)

eqns = sf.inject(field=u.forward, expr=sf * dt**2)
eqns = sf.inject(field=u.forward, expr=sf * dt**2)

op = Operator(eqns, opt=('advanced', {'blockrelax': 'device-aware',
'openmp': True,
'par-collapse-ncores': 1}))
op = Operator(eqns, opt=('advanced', {'blockrelax': 'device-aware',
'openmp': True,
'par-collapse-ncores': 1}))

assert_structure(op, ['t', 't,p_s0_blk0,p_s,rsx,rsy'],
't,p_s0_blk0,p_s,rsx,rsy')
assert_structure(op, ['t', 't,p_s0_blk0,p_s,rsx,rsy'],
't,p_s0_blk0,p_s,rsx,rsy')


class TestBlockingParTile(object):
class TestBlockingParTile:

@pytest.mark.parametrize('par_tile,expected', [
((16, 16, 16), ((16, 16, 16), (16, 16, 16))),
Expand Down Expand Up @@ -582,7 +600,7 @@ def test_cache_blocking_imperfect_nest_v2(blockinner):
assert np.allclose(u.data, u2.data, rtol=1e-07)


class TestNodeParallelism(object):
class TestNodeParallelism:

def test_nthreads_generation(self):
grid = Grid(shape=(10, 10))
Expand Down Expand Up @@ -1145,7 +1163,7 @@ def test_parallel_prec_inject(self):
assert 'omp for collapse' in iterations[1].pragmas[0].value


class TestNestedParallelism(object):
class TestNestedParallelism:

def test_basic(self):
grid = Grid(shape=(3, 3, 3))
Expand Down

0 comments on commit 3a8c46e

Please sign in to comment.