diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py index 8d9babbb5d..97531bd59c 100644 --- a/devito/arch/compiler.py +++ b/devito/arch/compiler.py @@ -899,4 +899,5 @@ def __lookup_cmds__(self): DEVITO_ARCH. Developers should add new compiler classes here. """ compiler_registry.update({'gcc-%s' % i: partial(GNUCompiler, suffix=i) - for i in ['4.9', '5', '6', '7', '8', '9', '10', '11', '12']}) + for i in ['4.9', '5', '6', '7', '8', '9', '10', + '11', '12', '13']}) diff --git a/devito/core/operator.py b/devito/core/operator.py index 3daa991316..5d0fb2efd8 100644 --- a/devito/core/operator.py +++ b/devito/core/operator.py @@ -6,7 +6,8 @@ from devito.mpi.routines import mpi_registry from devito.parameters import configuration from devito.operator import Operator -from devito.tools import as_tuple, is_integer, timed_pass, UnboundTuple +from devito.tools import (as_tuple, is_integer, timed_pass, + UnboundTuple, UnboundedMultiTuple) from devito.types import NThreads __all__ = ['CoreOperator', 'CustomOperator', @@ -338,11 +339,11 @@ def __new__(cls, items, rule=None, tag=None): return obj -class ParTile(tuple, OptOption): +class ParTile(UnboundedMultiTuple, OptOption): def __new__(cls, items, default=None): if not items: - return tuple() + return UnboundedMultiTuple() elif isinstance(items, bool): if not default: raise ValueError("Expected `default` value, got None") @@ -394,7 +395,7 @@ def __new__(cls, items, default=None): else: raise ValueError("Expected bool or iterable, got %s instead" % type(items)) - obj = super().__new__(cls, items) + obj = super().__new__(cls, *items) obj.default = as_tuple(default) return obj diff --git a/devito/passes/clusters/blocking.py b/devito/passes/clusters/blocking.py index efd3339ce9..6f4ee65fb5 100644 --- a/devito/passes/clusters/blocking.py +++ b/devito/passes/clusters/blocking.py @@ -431,7 +431,7 @@ class BlockSizeGenerator(object): """ def __init__(self, par_tile): - self.umt = UnboundedMultiTuple(*par_tile) + self.umt = par_tile self.tip = -1 # This is for Clusters that need a small par-tile to avoid under-utilizing @@ -459,11 +459,11 @@ def next(self, prefix, d, clusters): return self.umt_small.next() if x: - item = self.umt.curitem + item = self.umt.curitem() else: # We can't `self.umt.iter()` because we might still want to # fallback to `self.umt_small` - item = self.umt.nextitem + item = self.umt.nextitem() # Handle user-provided rules # TODO: This is also rudimentary @@ -474,15 +474,16 @@ def next(self, prefix, d, clusters): umt = self.umt else: umt = self.umt_small + if not x: + umt.iter() else: if item.rule in {d.name for d in prefix.itdims}: umt = self.umt else: # This is like "pattern unmatched" -- fallback to `umt_small` umt = self.umt_small - - if not x: - umt.iter() + if not x: + umt.iter() return umt.next() diff --git a/devito/passes/iet/languages/openacc.py b/devito/passes/iet/languages/openacc.py index 875d550417..89cb60252a 100644 --- a/devito/passes/iet/languages/openacc.py +++ b/devito/passes/iet/languages/openacc.py @@ -165,7 +165,7 @@ def _make_partree(self, candidates, nthreads=None): if self._is_offloadable(root) and \ all(i.is_Affine for i in [root] + collapsable) and \ self.par_tile: - tile = self.par_tile.next() + tile = self.par_tile.nextitem() assert isinstance(tile, UnboundTuple) body = self.DeviceIteration(gpu_fit=self.gpu_fit, tile=tile, diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py index 57c7af5cb5..ef1cd38af2 100644 --- a/devito/passes/iet/parpragma.py +++ b/devito/passes/iet/parpragma.py @@ -15,7 +15,7 @@ from devito.passes.iet.langbase import (LangBB, LangTransformer, DeviceAwareMixin, make_sections_from_imask) from devito.symbolics import INT, ccode -from devito.tools import UnboundTuple, as_tuple, flatten, is_integer, prod +from devito.tools import as_tuple, flatten, is_integer, prod from devito.types import Symbol __all__ = ['PragmaSimdTransformer', 'PragmaShmTransformer', @@ -622,7 +622,8 @@ def __init__(self, sregistry, options, platform, compiler): super().__init__(sregistry, options, platform, compiler) self.gpu_fit = options['gpu-fit'] - self.par_tile = UnboundTuple(*options['par-tile']) + # Need to reset the tile in case was already used and iter over by blocking + self.par_tile = options['par-tile'].reset() self.par_disabled = options['par-disabled'] def _score_candidate(self, n0, root, collapsable=()): @@ -658,7 +659,7 @@ def _make_partree(self, candidates, nthreads=None, index=None): if self._is_offloadable(root): body = self.DeviceIteration(gpu_fit=self.gpu_fit, ncollapsed=len(collapsable)+1, - tile=self.par_tile.next(), + tile=self.par_tile.nextitem(), **root.args) partree = ParallelTree([], body, nthreads=nthreads) diff --git a/devito/tools/data_structures.py b/devito/tools/data_structures.py index d8f92e0cf2..1ebfc3698a 100644 --- a/devito/tools/data_structures.py +++ b/devito/tools/data_structures.py @@ -664,7 +664,7 @@ def __new__(cls, *items, **kwargs): nitems.append(i) elif isinstance(i, Iterable): nitems.append(UnboundTuple(*i)) - elif i is not None: + else: nitems.append(i) obj = super().__new__(cls, tuple(nitems)) @@ -673,23 +673,23 @@ def __new__(cls, *items, **kwargs): return obj - @property - def default(self): - return self[0] - @property def prod(self): return np.prod(self) + def reset(self): + self.iter() + return self + def iter(self): self.current = 0 def next(self): - if self.last == 0: + if not self: return None item = self[self.current] if self.current == self.last-1 or self.current == -1: - self.current = -1 + self.current = self.last else: self.current += 1 return item @@ -702,6 +702,8 @@ def __repr__(self): return "%s(%s)" % (self.__class__.__name__, ", ".join(sitems)) def __getitem__(self, idx): + if not self: + return None if isinstance(idx, slice): start = idx.start or 0 stop = idx.stop or self.last @@ -753,26 +755,43 @@ class UnboundedMultiTuple(UnboundTuple): def __new__(cls, *items, **kwargs): obj = super().__new__(cls, *items, **kwargs) - obj.current = -1 + # MultiTuple are un-initialized + obj.current = None return obj - @property + def reset(self): + self.current = None + return self + def curitem(self): + if self.current is None: + raise StopIteration + if not self: + return None return self[self.current] - @property def nextitem(self): - return self[min(self.current + 1, max(self.last - 1, 0))] + if not self: + return None + self.iter() + return self.curitem() def index(self, item): return self.index(item) def iter(self): - self.current = min(self.current + 1, self.last - 1) - self[self.current].current = 0 + if self.current is None: + self.current = 0 + else: + self.current = min(self.current + 1, self.last - 1) + self[self.current].reset() return def next(self): - if self[self.current].current == -1: + if not self: + return None + if self.current is None: + raise StopIteration + if self[self.current].current >= self[self.current].last: raise StopIteration return self[self.current].next() diff --git a/docker/Dockerfile.nvidia b/docker/Dockerfile.nvidia index c2d6d074a5..0df21b4fe5 100644 --- a/docker/Dockerfile.nvidia +++ b/docker/Dockerfile.nvidia @@ -13,11 +13,8 @@ ENV DEBIAN_FRONTEND noninteractive # Install python RUN apt-get update && \ - apt-get install -y dh-autoreconf python3-venv python3-dev python3-pip - -RUN apt-get update -y && \ - apt-get install -y -q gpg apt-utils curl wget vim libnuma-dev tmux numactl - + apt-get install -y -q gpg apt-utils curl wget vim libnuma-dev tmux numactl \ + dh-autoreconf python3-venv python3-dev python3-pip # nodesource: nvdashboard requires nodejs>=10 RUN curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | gpg --yes --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg @@ -40,8 +37,10 @@ RUN if [ "$ver" = "nvhpc" ]; then \ apt-get install --allow-unauthenticated -y -q ./nvhpc_${year}.${minor}_amd64.deb; \ fi; -RUN curl -sL https://deb.nodesource.com/setup_18.x | bash - && \ - apt-get install -y -q \ +# Nodejs https://github.com/nodesource/distributions +RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --yes --dearmor -o /etc/apt/keyrings/nodesource.gpg +RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_18.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list +RUN apt-get update && apt-get install -y -q \ liblapack-dev libblas-dev \ libibverbs-dev libmlx4-1 libmlx5-1 ibutils \ # Devito Jupyter Notebooks and Ux experience diff --git a/tests/test_tools.py b/tests/test_tools.py index 24d4a35dbd..840d65b836 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -4,7 +4,7 @@ import time from devito.tools import (UnboundedMultiTuple, ctypes_to_cstr, toposort, - filter_ordered, transitive_closure) + filter_ordered, transitive_closure, UnboundTuple) from devito.types.basic import Symbol @@ -103,8 +103,14 @@ def test_ctypes_to_cstr(dtype, expected): def test_unbounded_multi_tuple(): ub = UnboundedMultiTuple([1, 2], [3, 4]) + with pytest.raises(StopIteration): + ub.next() + + with pytest.raises(StopIteration): + assert ub.curitem() ub.iter() + assert ub.curitem() == (1, 2) assert ub.next() == 1 assert ub.next() == 2 @@ -120,3 +126,21 @@ def test_unbounded_multi_tuple(): ub.iter() assert ub.next() == 3 + + assert ub.nextitem() == (3, 4) + + +def test_unbound_tuple(): + # Make sure we don't drop needed None for 2.5d + ub = UnboundTuple(None, None) + assert len(ub) == 2 + assert ub[10] is None + + ub = UnboundTuple(1, 2, 3) + assert len(ub) == 3 + assert ub[10] == 3 + assert ub[1:4] == (2, 3, 3) + assert ub.next() == 1 + assert ub.next() == 2 + ub.iter() + assert ub.next() == 1 diff --git a/tests/test_unexpansion.py b/tests/test_unexpansion.py index 9b6d30c9b1..0de2d55de7 100644 --- a/tests/test_unexpansion.py +++ b/tests/test_unexpansion.py @@ -4,6 +4,8 @@ from conftest import assert_structure, get_params, get_arrays, check_array from devito import (Buffer, Eq, Function, TimeFunction, Grid, Operator, Substitutions, Coefficient, cos, sin) +from devito.arch.compiler import OneapiCompiler +from devito.parameters import switchconfig, configuration from devito.types import Symbol @@ -129,6 +131,8 @@ def test_fusion_after_unexpansion(self): assert op._profiler._sections['section0'].sops == 21 assert_structure(op, ['t,x,y', 't,x,y,i0'], 't,x,y,i0') + @switchconfig(condition=isinstance(configuration['compiler'], + (OneapiCompiler)), safe_math=True) def test_v1(self): grid = Grid(shape=(10, 10, 10)) @@ -275,16 +279,17 @@ def test_v6(self): op.cfunction def test_transpose(self): - shape = (10, 10, 10) - grid = Grid(shape=shape) + shape = (11, 11, 11) + grid = Grid(shape=shape, extent=(10, 10, 10)) x, _, _ = grid.dimensions u = TimeFunction(name='u', grid=grid, space_order=4) u1 = TimeFunction(name='u', grid=grid, space_order=4) # Chessboard-like init - u.data[:] = np.indices(shape).sum(axis=0) % 10 + 1 - u1.data[:] = np.indices(shape).sum(axis=0) % 10 + 1 + hshape = u.data_with_halo.shape[1:] + u.data_with_halo[:] = np.indices(hshape).sum(axis=0) % 10 + 1 + u1.data_with_halo[:] = np.indices(hshape).sum(axis=0) % 10 + 1 eqn = Eq(u.forward, u.dx(x0=x+x.spacing/2).T + 1.) @@ -293,7 +298,6 @@ def test_transpose(self): op0.apply(time_M=10) op1.apply(time_M=10, u=u1) - assert np.allclose(u.data, u1.data, rtol=10e-6)