Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compiler: Add guards to prevent OOB when streaming buffers with ConditionalDimension #2150

Merged
merged 5 commits into from
Jul 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions devito/passes/clusters/buffering.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ def callback(self, clusters, prefix, cache=None):
b = Buffer(f, dim, d, accessv, cache, self.options, self.sregistry)
buffers.append(b)

guards = {}

if b.is_read or not b.has_uniform_subdims:
# Special case: avoid initialization if not strictly necessary
# See docstring for more info about what this implies
Expand All @@ -177,6 +179,8 @@ def callback(self, clusters, prefix, cache=None):
lhs = b.indexed[dims]._subs(dim, b.firstidx.b)
rhs = b.function[dims]._subs(dim, b.firstidx.f)

guards[b.xd] = GuardBound(0, b.firstidx.f)

elif b.is_write and init_onwrite(b.function):
dims = b.buffer.dimensions
lhs = b.buffer.indexify()
Expand All @@ -191,7 +195,7 @@ def callback(self, clusters, prefix, cache=None):

expr = lower_exprs(Eq(lhs, rhs))
ispace = b.writeto
guards = {pd: GuardBound(dim.root.symbolic_min, dim.root.symbolic_max)}
guards[pd] = GuardBound(dim.root.symbolic_min, dim.root.symbolic_max)
properties = {d: {AFFINE, PARALLEL} for d in ispace.itdimensions}

init.append(Cluster(expr, ispace, guards=guards, properties=properties))
Expand Down Expand Up @@ -224,10 +228,12 @@ def callback(self, clusters, prefix, cache=None):

expr = lower_exprs(Eq(lhs, rhs))
ispace = b.readfrom
guards = c.guards.andg(b.xd, GuardBound(0, b.firstidx.f))
properties = c.properties.sequentialize(d)

processed.append(
c.rebuild(exprs=expr, ispace=ispace, properties=properties)
c.rebuild(exprs=expr, ispace=ispace,
guards=guards, properties=properties)
)

# Substitute buffered Functions with the newly created buffers
Expand Down Expand Up @@ -256,10 +262,12 @@ def callback(self, clusters, prefix, cache=None):

expr = lower_exprs(uxreplace(Eq(lhs, rhs), b.subdims_mapper))
ispace = b.written
guards = c.guards.andg(b.xd, GuardBound(0, b.firstidx.f))
properties = c.properties.sequentialize(d)

processed.append(
c.rebuild(exprs=expr, ispace=ispace, properties=properties)
c.rebuild(exprs=expr, ispace=ispace,
guards=guards, properties=properties)
)

# Lift {write,read}-only buffers into separate IterationSpaces
Expand Down
38 changes: 19 additions & 19 deletions tests/test_buffering.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_read_write():
op1 = Operator(eqn, opt='buffering')

# Check generated code
assert len(retrieve_iteration_tree(op1)) == 2
assert len(retrieve_iteration_tree(op1)) == 3
buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
assert len(buffers) == 1
assert buffers.pop().symbolic_shape[0] == 2
Expand Down Expand Up @@ -77,7 +77,7 @@ def test_read_only():
op1 = Operator(eqns, opt='buffering')

# Check generated code
assert len(retrieve_iteration_tree(op1)) == 3
assert len(retrieve_iteration_tree(op1)) == 4
buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
assert len(buffers) == 1

Expand Down Expand Up @@ -126,7 +126,7 @@ def test_read_only_backwards():
op1 = Operator(eqns, opt='buffering')

# Check generated code
assert len(retrieve_iteration_tree(op1)) == 3
assert len(retrieve_iteration_tree(op1)) == 4
buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
assert len(buffers) == 1

Expand Down Expand Up @@ -157,7 +157,7 @@ def test_read_only_backwards_unstructured():
op1 = Operator(eqns, opt='buffering')

# Check generated code
assert len(retrieve_iteration_tree(op1)) == 2
assert len(retrieve_iteration_tree(op1)) == 3
buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
assert len(buffers) == 1

Expand All @@ -181,7 +181,7 @@ def test_async_degree(async_degree):
op1 = Operator(eqn, opt=('buffering', {'buf-async-degree': async_degree}))

# Check generated code
assert len(retrieve_iteration_tree(op1)) == 2
assert len(retrieve_iteration_tree(op1)) == 3
buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
assert len(buffers) == 1
assert buffers.pop().symbolic_shape[0] == async_degree
Expand Down Expand Up @@ -209,8 +209,8 @@ def test_two_homogeneous_buffers():
op2 = Operator(eqns, opt=('buffering', 'fuse'))

# Check generated code
assert len(retrieve_iteration_tree(op1)) == 2
assert len(retrieve_iteration_tree(op2)) == 2
assert len(retrieve_iteration_tree(op1)) == 3
assert len(retrieve_iteration_tree(op2)) == 3
buffers = [i for i in FindSymbols().visit(op1.body) if i.is_Array]
assert len(buffers) == 2

Expand Down Expand Up @@ -241,7 +241,7 @@ def test_two_heterogeneous_buffers():
op1 = Operator(eqns, opt='buffering')

# Check generated code
assert len(retrieve_iteration_tree(op1)) == 3
assert len(retrieve_iteration_tree(op1)) == 5
buffers = [i for i in FindSymbols().visit(op1.body) if i.is_Array]
assert len(buffers) == 2

Expand Down Expand Up @@ -272,7 +272,7 @@ def test_over_injection():

# Check generated code
assert len(retrieve_iteration_tree(op1)) ==\
5 + bool(configuration['language'] != 'C')
6 + bool(configuration['language'] != 'C')
buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
assert len(buffers) == 1

Expand Down Expand Up @@ -442,7 +442,7 @@ def test_subdims():
op1 = Operator(eqn, opt='buffering')

# Check generated code
assert len(retrieve_iteration_tree(op1)) == 2
assert len(retrieve_iteration_tree(op1)) == 3
assert len([i for i in FindSymbols().visit(op1) if i.is_Array]) == 1

op0.apply(time_M=nt-2)
Expand Down Expand Up @@ -474,7 +474,7 @@ def test_conddim_backwards():
op1 = Operator(eqns, opt='buffering')

# Check generated code
assert len(retrieve_iteration_tree(op1)) == 3
assert len(retrieve_iteration_tree(op1)) == 4
buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
assert len(buffers) == 1

Expand Down Expand Up @@ -507,7 +507,7 @@ def test_conddim_backwards_multi_slots():
op1 = Operator(eqns, opt='buffering')

# Check generated code
assert len(retrieve_iteration_tree(op1)) == 3
assert len(retrieve_iteration_tree(op1)) == 4
buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
assert len(buffers) == 1

Expand Down Expand Up @@ -544,7 +544,7 @@ def test_conddim_backwards_unstructured():
op1 = Operator(eqns, opt='buffering')

# Check generated code
assert len(retrieve_iteration_tree(op1)) == 3
assert len(retrieve_iteration_tree(op1)) == 4
buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
assert len(buffers) == 1

Expand Down Expand Up @@ -589,7 +589,7 @@ def test_conddim_w_shifting():
op1 = Operator(eqns, opt='buffering')

# Check generated code
assert len(retrieve_iteration_tree(op1)) == 3
assert len(retrieve_iteration_tree(op1)) == 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So what changed in the code since it seems to change existing tests not related to init_to_host

Copy link
Contributor Author

@ccuetom ccuetom Jun 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests also generate initialisation code that is affected by the addition of the guard. The loop:

  if (time_m <= time_M)
  {
    /* Begin section0 */
    START_TIMER(section0)
    for (int db0 = 0; db0 <= 0; db0 += 1)
    {
      for (int x = x_m; x <= x_M; x += 1)
      {
        for (int y = y_m; y <= y_M; y += 1)
        {
          ub0[(db0 + (time_M / factor))%(1)][x][y] = u[db0 + (time_M / factor)][x][y];
        }
      }
    }
    STOP_TIMER(section0,timers)
    /* End section0 */
  }

becomes:

  if (time_m <= time_M)
  {
    /* Begin section0 */
    START_TIMER(section0)
    for (int db0 = 0; db0 <= 0; db0 += 1)
    {
      if (0 <= db0 + (time_M / factor))
      {
        for (int x = x_m; x <= x_M; x += 1)
        {
          for (int y = y_m; y <= y_M; y += 1)
          {
            ub0[(db0 + (time_M / factor))%(1)][x][y] = u[db0 + (time_M / factor)][x][y];
          }
        }
      }
    }
    STOP_TIMER(section0,timers)
    /* End section0 */
  }

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is that what causes the bump from 3 to 4 iteration nests?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the iteration nest before adding the guard:

IterationTree(<WithProperties[affine,parallel]::Iteration db0; (0, 0, 1)>, <WithProperties[affine,parallel]::Iteration x; (x_m, x_M, 1)>, <WithProperties[affine,parallel]::Iteration y; (y_m, y_M, 1)>)
IterationTree(<WithProperties[affine,sequential]::Iteration time[t0,t1]; (time_m, time_M, 1)>,)
IterationTree(<WithProperties[affine,sequential]::Iteration time[t0,t1]; (time_m, time_M, 1)>, <WithProperties[affine,parallel,parallel=]::Iteration x; (x_m, x_M, 1)>, <WithProperties[affine,parallel,parallel=]::Iteration y; (y_m, y_M, 1)>)

and after adding the guard:

IterationTree(<WithProperties[affine,parallel]::Iteration db0; (0, 0, 1)>,)
IterationTree(<WithProperties[affine,parallel]::Iteration db0; (0, 0, 1)>, <WithProperties[affine,parallel]::Iteration x; (x_m, x_M, 1)>, <WithProperties[affine,parallel]::Iteration y; (y_m, y_M, 1)>)
IterationTree(<WithProperties[affine,sequential]::Iteration time[t0,t1]; (time_m, time_M, 1)>,)
IterationTree(<WithProperties[affine,sequential]::Iteration time[t0,t1]; (time_m, time_M, 1)>, <WithProperties[affine,parallel,parallel=]::Iteration x; (x_m, x_M, 1)>, <WithProperties[affine,parallel,parallel=]::Iteration y; (y_m, y_M, 1)>)

Iteration tree IterationTree(<WithProperties[affine,parallel]::Iteration db0; (0, 0, 1)>,) appears as a result. Not sure if this is expected or if you want me to probe into it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, it's OK!

buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
assert len(buffers) == 1

Expand Down Expand Up @@ -628,7 +628,7 @@ def test_multi_access():
op1 = Operator(eqns, opt='buffering')

# Check generated code
assert len(retrieve_iteration_tree(op1)) == 2
assert len(retrieve_iteration_tree(op1)) == 3
buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
assert len(buffers) == 1

Expand All @@ -652,10 +652,10 @@ def test_issue_1901():
op = Operator(eq, opt='buffering')

trees = retrieve_iteration_tree(op)
assert len(trees) == 2
assert trees[1].root.dim is time
assert not trees[1].root.is_Parallel
assert trees[1].root.is_Sequential # Obv
assert len(trees) == 3
assert trees[2].root.dim is time
assert not trees[2].root.is_Parallel
assert trees[2].root.is_Sequential # Obv


def test_everything():
Expand Down
28 changes: 28 additions & 0 deletions tests/test_gpu_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,34 @@ def test_streaming_multi_input(self, opt, ntmps):

assert np.all(grad.data == grad1.data)

def test_streaming_multi_input_conddim_foward(self):
nt = 10
grid = Grid(shape=(4, 4))
time_dim = grid.time_dim
x, y = grid.dimensions

factor = Constant(name='factor', value=2, dtype=np.int32)
time_sub = ConditionalDimension(name="time_sub", parent=time_dim, factor=factor)

u = TimeFunction(name='u', grid=grid, time_order=2, save=nt, time_dim=time_sub)
v = TimeFunction(name='v', grid=grid)
v1 = TimeFunction(name='v', grid=grid)

for i in range(u.save):
u.data[i, :] = i

expr = u.dt2 + 3.*u.dt(x0=time_sub - time_sub.spacing)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI this is just u.dtl

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good to know - I adapted the code from the next test there


eqns = [Eq(v.forward, v + expr + 1.)]

op0 = Operator(eqns, opt=('noop', {'gpu-fit': u}))
op1 = Operator(eqns, opt=('buffering', 'streaming', 'orchestrate'))

op0.apply(time_M=nt, dt=.01)
op1.apply(time_M=nt, dt=.01, v=v1)

assert np.all(v.data == v1.data)

def test_streaming_multi_input_conddim_backward(self):
nt = 10
grid = Grid(shape=(4, 4))
Expand Down