From b86d0b485472080bfb59d4524c90771a20a7fad5 Mon Sep 17 00:00:00 2001 From: mloubout Date: Thu, 2 Nov 2023 08:49:51 -0400 Subject: [PATCH] compiler: cleanup ParTile --- devito/arch/compiler.py | 3 +- devito/core/operator.py | 9 ++--- devito/passes/clusters/blocking.py | 13 ++++---- devito/passes/iet/languages/openacc.py | 2 +- devito/passes/iet/parpragma.py | 8 ++--- devito/tools/data_structures.py | 46 ++++++++++++++++++-------- tests/test_tools.py | 8 +++++ tests/test_unexpansion.py | 14 +++++--- 8 files changed, 68 insertions(+), 35 deletions(-) diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py index 8d9babbb5d..97531bd59c 100644 --- a/devito/arch/compiler.py +++ b/devito/arch/compiler.py @@ -899,4 +899,5 @@ def __lookup_cmds__(self): DEVITO_ARCH. Developers should add new compiler classes here. """ compiler_registry.update({'gcc-%s' % i: partial(GNUCompiler, suffix=i) - for i in ['4.9', '5', '6', '7', '8', '9', '10', '11', '12']}) + for i in ['4.9', '5', '6', '7', '8', '9', '10', + '11', '12', '13']}) diff --git a/devito/core/operator.py b/devito/core/operator.py index 3daa991316..5d0fb2efd8 100644 --- a/devito/core/operator.py +++ b/devito/core/operator.py @@ -6,7 +6,8 @@ from devito.mpi.routines import mpi_registry from devito.parameters import configuration from devito.operator import Operator -from devito.tools import as_tuple, is_integer, timed_pass, UnboundTuple +from devito.tools import (as_tuple, is_integer, timed_pass, + UnboundTuple, UnboundedMultiTuple) from devito.types import NThreads __all__ = ['CoreOperator', 'CustomOperator', @@ -338,11 +339,11 @@ def __new__(cls, items, rule=None, tag=None): return obj -class ParTile(tuple, OptOption): +class ParTile(UnboundedMultiTuple, OptOption): def __new__(cls, items, default=None): if not items: - return tuple() + return UnboundedMultiTuple() elif isinstance(items, bool): if not default: raise ValueError("Expected `default` value, got None") @@ -394,7 +395,7 @@ def __new__(cls, items, default=None): else: raise ValueError("Expected bool or iterable, got %s instead" % type(items)) - obj = super().__new__(cls, items) + obj = super().__new__(cls, *items) obj.default = as_tuple(default) return obj diff --git a/devito/passes/clusters/blocking.py b/devito/passes/clusters/blocking.py index efd3339ce9..6f4ee65fb5 100644 --- a/devito/passes/clusters/blocking.py +++ b/devito/passes/clusters/blocking.py @@ -431,7 +431,7 @@ class BlockSizeGenerator(object): """ def __init__(self, par_tile): - self.umt = UnboundedMultiTuple(*par_tile) + self.umt = par_tile self.tip = -1 # This is for Clusters that need a small par-tile to avoid under-utilizing @@ -459,11 +459,11 @@ def next(self, prefix, d, clusters): return self.umt_small.next() if x: - item = self.umt.curitem + item = self.umt.curitem() else: # We can't `self.umt.iter()` because we might still want to # fallback to `self.umt_small` - item = self.umt.nextitem + item = self.umt.nextitem() # Handle user-provided rules # TODO: This is also rudimentary @@ -474,15 +474,16 @@ def next(self, prefix, d, clusters): umt = self.umt else: umt = self.umt_small + if not x: + umt.iter() else: if item.rule in {d.name for d in prefix.itdims}: umt = self.umt else: # This is like "pattern unmatched" -- fallback to `umt_small` umt = self.umt_small - - if not x: - umt.iter() + if not x: + umt.iter() return umt.next() diff --git a/devito/passes/iet/languages/openacc.py b/devito/passes/iet/languages/openacc.py index 875d550417..89cb60252a 100644 --- a/devito/passes/iet/languages/openacc.py +++ b/devito/passes/iet/languages/openacc.py @@ -165,7 +165,7 @@ def _make_partree(self, candidates, nthreads=None): if self._is_offloadable(root) and \ all(i.is_Affine for i in [root] + collapsable) and \ self.par_tile: - tile = self.par_tile.next() + tile = self.par_tile.nextitem() assert isinstance(tile, UnboundTuple) body = self.DeviceIteration(gpu_fit=self.gpu_fit, tile=tile, diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py index eb6edcf1ee..ef1cd38af2 100644 --- a/devito/passes/iet/parpragma.py +++ b/devito/passes/iet/parpragma.py @@ -15,7 +15,7 @@ from devito.passes.iet.langbase import (LangBB, LangTransformer, DeviceAwareMixin, make_sections_from_imask) from devito.symbolics import INT, ccode -from devito.tools import UnboundTuple, as_tuple, flatten, is_integer, prod +from devito.tools import as_tuple, flatten, is_integer, prod from devito.types import Symbol __all__ = ['PragmaSimdTransformer', 'PragmaShmTransformer', @@ -622,8 +622,8 @@ def __init__(self, sregistry, options, platform, compiler): super().__init__(sregistry, options, platform, compiler) self.gpu_fit = options['gpu-fit'] - self.par_tile = UnboundTuple(*options['par-tile'], - default=options['par-tile'].default) + # Need to reset the tile in case was already used and iter over by blocking + self.par_tile = options['par-tile'].reset() self.par_disabled = options['par-disabled'] def _score_candidate(self, n0, root, collapsable=()): @@ -659,7 +659,7 @@ def _make_partree(self, candidates, nthreads=None, index=None): if self._is_offloadable(root): body = self.DeviceIteration(gpu_fit=self.gpu_fit, ncollapsed=len(collapsable)+1, - tile=self.par_tile.next(), + tile=self.par_tile.nextitem(), **root.args) partree = ParallelTree([], body, nthreads=nthreads) diff --git a/devito/tools/data_structures.py b/devito/tools/data_structures.py index deed27c0d9..1ebfc3698a 100644 --- a/devito/tools/data_structures.py +++ b/devito/tools/data_structures.py @@ -670,27 +670,26 @@ def __new__(cls, *items, **kwargs): obj = super().__new__(cls, tuple(nitems)) obj.last = len(nitems) obj.current = 0 - obj._default = kwargs.get('default', nitems[0]) return obj - @property - def default(self): - return self._default - @property def prod(self): return np.prod(self) + def reset(self): + self.iter() + return self + def iter(self): self.current = 0 def next(self): - if self.last == 0: + if not self: return None item = self[self.current] if self.current == self.last-1 or self.current == -1: - self.current = -1 + self.current = self.last else: self.current += 1 return item @@ -703,6 +702,8 @@ def __repr__(self): return "%s(%s)" % (self.__class__.__name__, ", ".join(sitems)) def __getitem__(self, idx): + if not self: + return None if isinstance(idx, slice): start = idx.start or 0 stop = idx.stop or self.last @@ -754,26 +755,43 @@ class UnboundedMultiTuple(UnboundTuple): def __new__(cls, *items, **kwargs): obj = super().__new__(cls, *items, **kwargs) - obj.current = -1 + # MultiTuple are un-initialized + obj.current = None return obj - @property + def reset(self): + self.current = None + return self + def curitem(self): + if self.current is None: + raise StopIteration + if not self: + return None return self[self.current] - @property def nextitem(self): - return self[min(self.current + 1, max(self.last - 1, 0))] + if not self: + return None + self.iter() + return self.curitem() def index(self, item): return self.index(item) def iter(self): - self.current = min(self.current + 1, self.last - 1) - self[self.current].current = 0 + if self.current is None: + self.current = 0 + else: + self.current = min(self.current + 1, self.last - 1) + self[self.current].reset() return def next(self): - if self[self.current].current == -1: + if not self: + return None + if self.current is None: + raise StopIteration + if self[self.current].current >= self[self.current].last: raise StopIteration return self[self.current].next() diff --git a/tests/test_tools.py b/tests/test_tools.py index 375b24239d..840d65b836 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -103,8 +103,14 @@ def test_ctypes_to_cstr(dtype, expected): def test_unbounded_multi_tuple(): ub = UnboundedMultiTuple([1, 2], [3, 4]) + with pytest.raises(StopIteration): + ub.next() + + with pytest.raises(StopIteration): + assert ub.curitem() ub.iter() + assert ub.curitem() == (1, 2) assert ub.next() == 1 assert ub.next() == 2 @@ -121,6 +127,8 @@ def test_unbounded_multi_tuple(): ub.iter() assert ub.next() == 3 + assert ub.nextitem() == (3, 4) + def test_unbound_tuple(): # Make sure we don't drop needed None for 2.5d diff --git a/tests/test_unexpansion.py b/tests/test_unexpansion.py index 9b6d30c9b1..0de2d55de7 100644 --- a/tests/test_unexpansion.py +++ b/tests/test_unexpansion.py @@ -4,6 +4,8 @@ from conftest import assert_structure, get_params, get_arrays, check_array from devito import (Buffer, Eq, Function, TimeFunction, Grid, Operator, Substitutions, Coefficient, cos, sin) +from devito.arch.compiler import OneapiCompiler +from devito.parameters import switchconfig, configuration from devito.types import Symbol @@ -129,6 +131,8 @@ def test_fusion_after_unexpansion(self): assert op._profiler._sections['section0'].sops == 21 assert_structure(op, ['t,x,y', 't,x,y,i0'], 't,x,y,i0') + @switchconfig(condition=isinstance(configuration['compiler'], + (OneapiCompiler)), safe_math=True) def test_v1(self): grid = Grid(shape=(10, 10, 10)) @@ -275,16 +279,17 @@ def test_v6(self): op.cfunction def test_transpose(self): - shape = (10, 10, 10) - grid = Grid(shape=shape) + shape = (11, 11, 11) + grid = Grid(shape=shape, extent=(10, 10, 10)) x, _, _ = grid.dimensions u = TimeFunction(name='u', grid=grid, space_order=4) u1 = TimeFunction(name='u', grid=grid, space_order=4) # Chessboard-like init - u.data[:] = np.indices(shape).sum(axis=0) % 10 + 1 - u1.data[:] = np.indices(shape).sum(axis=0) % 10 + 1 + hshape = u.data_with_halo.shape[1:] + u.data_with_halo[:] = np.indices(hshape).sum(axis=0) % 10 + 1 + u1.data_with_halo[:] = np.indices(hshape).sum(axis=0) % 10 + 1 eqn = Eq(u.forward, u.dx(x0=x+x.spacing/2).T + 1.) @@ -293,7 +298,6 @@ def test_transpose(self): op0.apply(time_M=10) op1.apply(time_M=10, u=u1) - assert np.allclose(u.data, u1.data, rtol=10e-6)