diff --git a/devito/builtins/initializers.py b/devito/builtins/initializers.py index f338e194e1..83bad735fa 100644 --- a/devito/builtins/initializers.py +++ b/devito/builtins/initializers.py @@ -77,7 +77,12 @@ def assign(f, rhs=0, options=None, name='assign', assign_halo=False, **kwargs): symbolic_max=d.symbolic_max + h.right) eqs = [eq.xreplace(subs) for eq in eqs] - dv.Operator(eqs, name=name, **kwargs)() + op = dv.Operator(eqs, name=name, **kwargs) + try: + op() + except ValueError: + # Corner case such as assign(u, v) with v a Buffered TimeFunction + op(time_M=f._time_size) def smooth(f, g, axis=None): diff --git a/devito/core/autotuning.py b/devito/core/autotuning.py index 603a78efd6..f30c020ef7 100644 --- a/devito/core/autotuning.py +++ b/devito/core/autotuning.py @@ -209,8 +209,9 @@ def init_time_bounds(stepper, at_args, args): else: at_args[dim.max_name] = at_args[dim.min_name] + options['squeezer'] if dim.size_name in args: - # May need to shrink to avoid OOB accesses - at_args[dim.max_name] = min(at_args[dim.max_name], args[dim.max_name]) + if not isinstance(args[dim.size_name], range): + # May need to shrink to avoid OOB accesses + at_args[dim.max_name] = min(at_args[dim.max_name], args[dim.max_name]) if at_args[dim.min_name] > at_args[dim.max_name]: warning("too few time iterations; skipping") return False diff --git a/devito/ir/stree/algorithms.py b/devito/ir/stree/algorithms.py index 58e8e844e6..0f7e46bcfd 100644 --- a/devito/ir/stree/algorithms.py +++ b/devito/ir/stree/algorithms.py @@ -147,6 +147,12 @@ def preprocess(clusters, options=None, **kwargs): found = [] for c1 in list(queue): distributed_aindices = c1.halo_scheme.distributed_aindices + h_indices = set().union(*[d._defines for d in c1.halo_scheme.loc_indices]) + + # Skip if the halo exchange would end up outside + # its iteration space + if h_indices and not h_indices & dims: + continue diff = dims - distributed_aindices intersection = dims & distributed_aindices diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index 0204c171e6..970e84633d 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -361,6 +361,10 @@ def distributed(self): def distributed_aindices(self): return set().union(*[i.dims for i in self.fmapper.values()]) + @cached_property + def loc_indices(self): + return set().union(*[i.loc_indices.keys() for i in self.fmapper.values()]) + @cached_property def arguments(self): return self.dimensions | set(flatten(self.honored.values())) diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py index ab758ba982..3d2dcb7466 100644 --- a/devito/operations/interpolators.py +++ b/devito/operations/interpolators.py @@ -155,13 +155,31 @@ def _rdim(self): -self.r+1, self.r, 2*self.r, parent) for d in self._gdims] - return DimensionTuple(*dims, getters=self._gdims) + # Make radius dimension conditional to avoid OOB + rdims = [] + pos = self.sfunction._position_map.values() + + for (d, rd, p) in zip(self._gdims, dims, pos): + # Add conditional to avoid OOB + lb = sympy.And(rd + p >= d.symbolic_min - self.r, evaluate=False) + ub = sympy.And(rd + p <= d.symbolic_max + self.r, evaluate=False) + cond = sympy.And(lb, ub, evaluate=False) + rdims.append(ConditionalDimension(rd.name, rd, condition=cond, + indirect=True)) + + return DimensionTuple(*rdims, getters=self._gdims) + + def _augment_implicit_dims(self, implicit_dims, extras=None): + if extras is not None: + extra = set([i for v in extras for i in v.dimensions]) - set(self._gdims) + extra = tuple(extra) + else: + extra = tuple() - def _augment_implicit_dims(self, implicit_dims): if self.sfunction._sparse_position == -1: - return self.sfunction.dimensions + as_tuple(implicit_dims) + return self.sfunction.dimensions + as_tuple(implicit_dims) + extra else: - return as_tuple(implicit_dims) + self.sfunction.dimensions + return as_tuple(implicit_dims) + self.sfunction.dimensions + extra def _coeff_temps(self, implicit_dims): return [] @@ -177,13 +195,6 @@ def _interp_idx(self, variables, implicit_dims=None): mapper = {} pos = self.sfunction._position_map.values() - for ((di, d), rd, p) in zip(enumerate(self._gdims), self._rdim, pos): - # Add conditional to avoid OOB - lb = sympy.And(rd + p >= d.symbolic_min - self.r, evaluate=False) - ub = sympy.And(rd + p <= d.symbolic_max + self.r, evaluate=False) - cond = sympy.And(lb, ub, evaluate=False) - mapper[d] = ConditionalDimension(rd.name, rd, condition=cond, indirect=True) - # Temporaries for the position temps = self._positions(implicit_dims) @@ -191,10 +202,10 @@ def _interp_idx(self, variables, implicit_dims=None): temps.extend(self._coeff_temps(implicit_dims)) # Substitution mapper for variables + mapper = self._rdim._getters idx_subs = {v: v.subs({k: c + p for ((k, c), p) in zip(mapper.items(), pos)}) for v in variables} - idx_subs.update(dict(zip(self._rdim, mapper.values()))) return idx_subs, temps @@ -247,8 +258,6 @@ def _interpolate(self, expr, increment=False, self_subs={}, implicit_dims=None): interpolation expression, but that should be honored when constructing the operator. """ - implicit_dims = self._augment_implicit_dims(implicit_dims) - # Derivatives must be evaluated before the introduction of indirect accesses try: _expr = expr.evaluate @@ -258,6 +267,9 @@ def _interpolate(self, expr, increment=False, self_subs={}, implicit_dims=None): variables = list(retrieve_function_carriers(_expr)) + # Implicit dimensions + implicit_dims = self._augment_implicit_dims(implicit_dims) + # List of indirection indices for all adjacent grid points idx_subs, temps = self._interp_idx(variables, implicit_dims=implicit_dims) @@ -290,11 +302,10 @@ def _inject(self, field, expr, implicit_dims=None): injection expression, but that should be honored when constructing the operator. """ - implicit_dims = self._augment_implicit_dims(implicit_dims) + self._rdim - # Make iterable to support inject((u, v), expr=expr) # or inject((u, v), expr=(expr1, expr2)) fields, exprs = as_tuple(field), as_tuple(expr) + # Provide either one expr per field or on expr for all fields if len(fields) > 1: if len(exprs) == 1: @@ -310,6 +321,14 @@ def _inject(self, field, expr, implicit_dims=None): _exprs = exprs variables = list(v for e in _exprs for v in retrieve_function_carriers(e)) + + # Implicit dimensions + implicit_dims = self._augment_implicit_dims(implicit_dims, variables) + # Move all temporaries inside inner loop to improve parallelism + # Can only be done for inject as interpolation need a temporary + # summing temp that wouldn't allow collapsing + implicit_dims = implicit_dims + tuple(r.parent for r in self._rdim) + variables = variables + list(fields) # List of indirection indices for all adjacent grid points @@ -380,5 +399,7 @@ def interpolation_coeffs(self): @property def _weights(self): ddim, cdim = self.interpolation_coeffs.dimensions[1:] - return Mul(*[self.interpolation_coeffs.subs({ddim: ri, cdim: rd-rd.symbolic_min}) - for (ri, rd) in enumerate(self._rdim)]) + mappers = [{ddim: ri, cdim: rd-rd.parent.symbolic_min} + for (ri, rd) in enumerate(self._rdim)] + return Mul(*[self.interpolation_coeffs.subs(mapper) + for mapper in mappers]) diff --git a/devito/operator/operator.py b/devito/operator/operator.py index d1bee9fa66..eb8b793f22 100644 --- a/devito/operator/operator.py +++ b/devito/operator/operator.py @@ -24,7 +24,7 @@ from devito.symbolics import estimate_cost from devito.tools import (DAG, OrderedSet, Signer, ReducerMap, as_tuple, flatten, filter_sorted, frozendict, is_integer, split, timed_pass, - timed_region) + timed_region, contains_val) from devito.types import Grid, Evaluable __all__ = ['Operator'] @@ -526,6 +526,7 @@ def _prepare_arguments(self, autotune=None, **kwargs): edges = [(i, i.parent) for i in self.dimensions if i.is_Derived and i.parent in set(nodes)] toposort = DAG(nodes, edges).topological_sort() + futures = {} for d in reversed(toposort): if set(d._arg_names).intersection(kwargs): @@ -560,11 +561,12 @@ def _prepare_arguments(self, autotune=None, **kwargs): # a TimeFunction `usave(t_sub, x, y)`, an override for `fact` is # supplied w/o overriding `usave`; that's legal pass - elif is_integer(args[k]) and args[k] not in as_tuple(v): + elif is_integer(args[k]) and not contains_val(args[k], v): raise ValueError("Default `%s` is incompatible with other args as " "`%s=%s`, while `%s=%s` is expected. Perhaps you " "forgot to override `%s`?" % (p, k, v, k, args[k], p)) + args = kwargs['args'] = args.reduce_all() # DiscreteFunctions may be created from CartesianDiscretizations, which in @@ -572,6 +574,11 @@ def _prepare_arguments(self, autotune=None, **kwargs): discretizations = {getattr(kwargs[p.name], 'grid', None) for p in overrides} discretizations.update({getattr(p, 'grid', None) for p in defaults}) discretizations.discard(None) + # Remove subgrids if multiple grids + if len(discretizations) > 1: + discretizations = {g for g in discretizations + if not any(d.is_Derived for d in g.dimensions)} + for i in discretizations: args.update(i._arg_values(**kwargs)) diff --git a/devito/passes/iet/langbase.py b/devito/passes/iet/langbase.py index 36ce348ac4..2acccba648 100644 --- a/devito/passes/iet/langbase.py +++ b/devito/passes/iet/langbase.py @@ -214,6 +214,9 @@ def DeviceIteration(self): def Prodder(self): return self.lang.Prodder + def _device_pointers(self, *args, **kwargs): + return {} + class DeviceAwareMixin(object): @@ -325,6 +328,11 @@ def _(iet): return _initialize(iet) + def _device_pointers(self, iet): + functions = FindSymbols().visit(iet) + devfuncs = [f for f in functions if f.is_Array and f._mem_local] + return set(devfuncs) + def _is_offloadable(self, iet): """ True if the IET computation is offloadable to device, False otherwise. diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py index 44ee6afd6c..b6476192b2 100644 --- a/devito/passes/iet/parpragma.py +++ b/devito/passes/iet/parpragma.py @@ -295,15 +295,13 @@ def _select_candidates(self, candidates): except TypeError: pass - # At least one inner loop (nested) or - # we do not collapse most inner loop if it is an atomic reduction - if not i.is_ParallelAtomic or nested: - collapsable.append(i) + collapsable.append(i) # Give a score to this candidate, based on the number of fully-parallel # Iterations and their position (i.e. outermost to innermost) in the nest score = ( int(root.is_ParallelNoAtomic), + len(self._device_pointers(root)), # Outermost offloadable int(len([i for i in collapsable if i.is_ParallelNoAtomic]) >= 1), int(len([i for i in collapsable if i.is_ParallelRelaxed]) >= 1), -(n0 + 1) # The outermost, the better @@ -377,9 +375,14 @@ def _make_partree(self, candidates, nthreads=None): ncollapsed=ncollapsed, nthreads=nthreads, **root.args) prefix = [] + elif nthreads is not None: + body = self.HostIteration(schedule='static', + parallel=nthreads is not self.nthreads_nested, + ncollapsed=ncollapsed, nthreads=nthreads, + **root.args) + prefix = [] else: # pragma ... for ... schedule(..., expr) - assert nthreads is None nthreads = self.nthreads_nonaffine chunk_size = Symbol(name='chunk_size') body = self.HostIteration(ncollapsed=ncollapsed, chunk_size=chunk_size, @@ -428,11 +431,6 @@ def _make_nested_partree(self, partree): if self.nhyperthreads <= self.nested: return partree - # Loop nest with atomic reductions are more likely to have less latency - # keep outer loop parallel - if partree.root.is_ParallelAtomic: - return partree - # Note: there might be multiple sub-trees amenable to nested parallelism, # hence we loop over all of them # diff --git a/devito/tools/data_structures.py b/devito/tools/data_structures.py index 539f75d593..01a9a3f4bd 100644 --- a/devito/tools/data_structures.py +++ b/devito/tools/data_structures.py @@ -110,6 +110,7 @@ def unique(self, key): Key for which to retrieve a unique value. """ candidates = self.getall(key) + candidates = [c for c in candidates if c is not None] def compare_to_first(v): first = candidates[0] @@ -122,12 +123,23 @@ def compare_to_first(v): return first in v elif isinstance(first, Set): return v in first + elif isinstance(v, range): + if isinstance(first, range): + return first.stop > v.start or v.stop > first.start + else: + return first >= v.start and first < v.stop + elif isinstance(first, range): + return v >= first.start and v < first.stop else: return first == v if len(candidates) == 1: return candidates[0] elif all(map(compare_to_first, candidates)): + # return first non-range + for c in candidates: + if not isinstance(c, range): + return c return candidates[0] else: raise ValueError("Unable to find unique value for key %s, candidates: %s" diff --git a/devito/tools/utils.py b/devito/tools/utils.py index 16f0987930..d99bf34b25 100644 --- a/devito/tools/utils.py +++ b/devito/tools/utils.py @@ -12,7 +12,7 @@ 'roundm', 'powerset', 'invert', 'flatten', 'single_or', 'filter_ordered', 'as_mapper', 'filter_sorted', 'pprint', 'sweep', 'all_equal', 'as_list', 'indices_to_slices', 'indices_to_sections', 'transitive_closure', - 'humanbytes'] + 'humanbytes', 'contains_val'] def prod(iterable, initial=1): @@ -75,6 +75,13 @@ def is_integer(value): return isinstance(value, (int, np.integer, sympy.Integer)) +def contains_val(val, items): + try: + return val in items + except TypeError: + return val == items + + def generator(): """ Return a function ``f`` that generates integer numbers starting at 0 diff --git a/devito/types/basic.py b/devito/types/basic.py index c36496d195..400232faed 100644 --- a/devito/types/basic.py +++ b/devito/types/basic.py @@ -838,13 +838,15 @@ def __new__(cls, *args, **kwargs): newobj._dimensions = dimensions newobj._shape = cls.__shape_setup__(**kwargs) newobj._dtype = cls.__dtype_setup__(**kwargs) - newobj.__init_finalize__(*args, **kwargs) # All objects created off an existing AbstractFunction `f` (e.g., # via .func, or .subs, such as `f(x + 1)`) keep a reference to `f` # through the `function` field newobj.function = function or newobj + # Initialization + newobj.__init_finalize__(*args, **kwargs) + return newobj def __init__(self, *args, **kwargs): diff --git a/devito/types/dense.py b/devito/types/dense.py index ec371b662c..2a13fa91af 100644 --- a/devito/types/dense.py +++ b/devito/types/dense.py @@ -94,7 +94,7 @@ def __init_finalize__(self, *args, function=None, **kwargs): # a reference to the user-provided buffer self._initializer = None if len(initializer) > 0: - self.data_with_halo[:] = initializer + self.data_with_halo[:] = initializer[:] else: # This is a corner case -- we might get here, for example, when # running with MPI and some processes get 0-size arrays after diff --git a/devito/types/dimension.py b/devito/types/dimension.py index 76d9d9e60a..d7b25e382d 100644 --- a/devito/types/dimension.py +++ b/devito/types/dimension.py @@ -298,13 +298,19 @@ def _arg_values(self, interval, grid=None, args=None, **kwargs): # may represent sets of legal values. If that's the case, here we just # pick one. Note that we sort for determinism try: - loc_minv = sorted(loc_minv).pop(0) - except TypeError: - pass + loc_minv = loc_minv.stop + except AttributeError: + try: + loc_minv = sorted(loc_minv).pop(0) + except TypeError: + pass try: - loc_maxv = sorted(loc_maxv).pop(0) - except TypeError: - pass + loc_maxv = loc_maxv.stop + except AttributeError: + try: + loc_maxv = sorted(loc_maxv).pop(0) + except TypeError: + pass return {self.min_name: loc_minv, self.max_name: loc_maxv} @@ -580,7 +586,7 @@ def left(cls, name, parent, thickness, local=True): return cls(name, parent, left=parent.symbolic_min, right=parent.symbolic_min+lst-1, - thickness=((lst, thickness), (rst, 0)), + thickness=((lst, thickness), (rst, None)), local=local) @classmethod @@ -589,7 +595,7 @@ def right(cls, name, parent, thickness, local=True): return cls(name, parent, left=parent.symbolic_max-rst+1, right=parent.symbolic_max, - thickness=((lst, 0), (rst, thickness)), + thickness=((lst, None), (rst, thickness)), local=local) @classmethod @@ -622,6 +628,18 @@ def local(self): def thickness(self): return self._thickness + @property + def is_left(self): + return self.thickness.right[1] is None + + @property + def is_right(self): + return self.thickness.left[1] is None + + @property + def is_middle(self): + return not self.is_left and not self.is_right + @cached_property def bound_symbols(self): # Add thickness symbols @@ -695,7 +713,7 @@ def _arg_values(self, interval, grid=None, **kwargs): # However, arguments from the user are considered global # So overriding the thickness to a nonzero value should not cause # boundaries to exist between ranks where they did not before - requested_ltkn, requested_rtkn = ( + r_ltkn, r_rtkn = ( kwargs.get(k.name, v) for k, v in self.thickness ) @@ -704,19 +722,24 @@ def _arg_values(self, interval, grid=None, **kwargs): if self.local: # dimension is of type ``left``/right`` - compute the 'offset' # and then add 1 to get the appropriate thickness - ltkn = grid.distributor.glb_to_loc(self.root, requested_ltkn-1, LEFT) - rtkn = grid.distributor.glb_to_loc(self.root, requested_rtkn-1, RIGHT) - ltkn = ltkn+1 if ltkn is not None else 0 - rtkn = rtkn+1 if rtkn is not None else 0 + if r_ltkn is not None: + ltkn = grid.distributor.glb_to_loc(self.root, r_ltkn-1, LEFT) + ltkn = ltkn+1 if ltkn is not None else 0 + else: + ltkn = 0 + + if r_rtkn is not None: + rtkn = grid.distributor.glb_to_loc(self.root, r_rtkn-1, RIGHT) + rtkn = rtkn+1 if rtkn is not None else 0 + else: + rtkn = 0 else: # dimension is of type ``middle`` - ltkn = grid.distributor.glb_to_loc(self.root, requested_ltkn, - LEFT) or 0 - rtkn = grid.distributor.glb_to_loc(self.root, requested_rtkn, - RIGHT) or 0 + ltkn = grid.distributor.glb_to_loc(self.root, r_ltkn, LEFT) or 0 + rtkn = grid.distributor.glb_to_loc(self.root, r_rtkn, RIGHT) or 0 else: - ltkn = requested_ltkn - rtkn = requested_rtkn + ltkn = r_ltkn or 0 + rtkn = r_rtkn or 0 return {i.name: v for i, v in zip(self._thickness_map, (ltkn, rtkn))} @@ -853,8 +876,8 @@ def _arg_defaults(self, _min=None, size=None, alias=None): factor = defaults[dim._factor.name] = dim._factor.data except AttributeError: factor = dim._factor - defaults[dim.parent.max_name] = \ - frozenset(range(factor*(size - 1), factor*(size))) + + defaults[dim.parent.max_name] = range(1, factor*size - 1) return defaults diff --git a/tests/test_dimension.py b/tests/test_dimension.py index 32da3b22e3..a54e160b38 100644 --- a/tests/test_dimension.py +++ b/tests/test_dimension.py @@ -243,6 +243,10 @@ def test_subdim_middle(self, opt): xi = SubDimension.middle(name='xi', parent=x, thickness_left=1, thickness_right=1) + assert xi.is_middle + assert not xi.is_left + assert not xi.is_right + eqs = [Eq(u.forward, u + 1)] eqs = [e.subs(x, xi) for e in eqs] @@ -261,6 +265,8 @@ def test_symbolic_size(self): thickness = 4 xleft = SubDimension.left(name='xleft', parent=x, thickness=thickness) + assert xleft.is_left + assert not xleft.is_middle assert xleft.symbolic_size == xleft.thickness.left[0] xi = SubDimension.middle(name='xi', parent=x, @@ -289,7 +295,8 @@ def test_bcs(self, opt): xi = SubDimension.middle(name='xi', parent=x, thickness_left=thickness, thickness_right=thickness) xright = SubDimension.right(name='xright', parent=x, thickness=thickness) - + assert xright.is_right + assert not xright.is_middle yi = SubDimension.middle(name='yi', parent=y, thickness_left=thickness, thickness_right=thickness) @@ -1515,7 +1522,7 @@ def test_issue_1927(self, factor): op = Operator(Eq(f, 1)) - assert op.arguments()['time_M'] == 4*(save-1) # == min legal endpoint + assert op.arguments()['time_M'] == 4*save-1 # == min legal endpoint # Also no issues when supplying an override assert op.arguments(time_M=10)['time_M'] == 10 @@ -1530,7 +1537,6 @@ def test_issue_1927_v2(self): i = Dimension(name='i') ci = ConditionalDimension(name='ci', parent=i, factor=factor) - g = Function(name='g', shape=(size,), dimensions=(i,)) f = Function(name='f', shape=(int(size/factor),), dimensions=(ci,)) diff --git a/tests/test_dle.py b/tests/test_dle.py index 86a288ac00..8d58827a61 100644 --- a/tests/test_dle.py +++ b/tests/test_dle.py @@ -193,8 +193,6 @@ def test_cache_blocking_structure_optrelax(): assert len(iters) == 5 assert iters[0].dim.is_Block assert iters[1].dim.is_Block - for i in range(2, 5): - assert not iters[i].dim.is_Block def test_cache_blocking_structure_optrelax_customdim(): @@ -286,7 +284,7 @@ def test_cache_blocking_structure_optrelax_prec_inject(): 'openmp': True, 'par-collapse-ncores': 1})) - assert_structure(op, ['t', 't,p_s0_blk0,p_s,rsx,rsy'], + assert_structure(op, ['t,p_s0_blk0,p_s', 't,p_s0_blk0,p_s,rsx,rsy'], 't,p_s0_blk0,p_s,rsx,rsy') @@ -816,12 +814,13 @@ def test_incs_no_atomic(self): 'par-collapse-ncores': 1, 'par-collapse-work': 0})) - assert 'collapse(2)' in str(op0) + assert 'collapse(3)' in str(op0) assert 'atomic' in str(op0) # Now only `x` is parallelized op1 = Operator([Eq(v[t, x, 0, 0], v[t, x, 0, 0] + 1), Inc(uf, 1)], opt=('advanced', {'openmp': True, 'par-collapse-ncores': 1})) + assert 'omp for' in str(op1) assert 'collapse' not in str(op1) assert 'atomic' not in str(op1) @@ -946,11 +945,12 @@ def test_parallel_prec_inject(self): eqns = sf.inject(field=u.forward, expr=sf * dt**2) op0 = Operator(eqns, opt=('advanced', {'openmp': True, - 'par-collapse-ncores': 1})) + 'par-collapse-ncores': 20})) iterations = FindNodes(Iteration).visit(op0) assert not iterations[0].pragmas assert 'omp for' in iterations[1].pragmas[0].value + assert 'collapse' not in iterations[1].pragmas[0].value op0 = Operator(eqns, opt=('advanced', {'openmp': True, 'par-collapse-ncores': 1, @@ -958,7 +958,7 @@ def test_parallel_prec_inject(self): iterations = FindNodes(Iteration).visit(op0) assert not iterations[0].pragmas - assert 'omp for collapse(2)' in iterations[1].pragmas[0].value + assert 'omp for collapse' in iterations[1].pragmas[0].value class TestNestedParallelism(object): diff --git a/tests/test_dse.py b/tests/test_dse.py index 2aefe69ed4..728f8f9357 100644 --- a/tests/test_dse.py +++ b/tests/test_dse.py @@ -2713,6 +2713,7 @@ def test_fullopt(self): assert summary1[('section0', None)].ops == 9 assert summary1[('section1', None)].ops == 31 assert summary1[('section2', None)].ops == 88 + assert summary1[('section3', None)].ops == 22 assert np.isclose(summary1[('section1', None)].oi, 1.767, atol=0.001) assert np.allclose(u0.data, u1.data, atol=10e-5) diff --git a/tests/test_gpu_openacc.py b/tests/test_gpu_openacc.py index 823d11854d..3085ad85c9 100644 --- a/tests/test_gpu_openacc.py +++ b/tests/test_gpu_openacc.py @@ -110,7 +110,7 @@ def test_tile_insteadof_collapse(self, par_tile): 'acc parallel loop tile(32,4) present(u)' # Only the AFFINE Iterations are tiled assert trees[3][1].pragmas[0].value ==\ - 'acc parallel loop collapse(3) present(src,src_coords,u)' + 'acc parallel loop collapse(4) present(src,src_coords,u)' @pytest.mark.parametrize('par_tile', [((32, 4, 4), (8, 8)), ((32, 4), (8, 8)), ((32, 4, 4), (8, 8, 8))]) @@ -136,6 +136,8 @@ def test_multiple_tile_sizes(self, par_tile): 'acc parallel loop tile(32,4,4) present(u)' assert trees[1][1].pragmas[0].value ==\ 'acc parallel loop tile(8,8) present(u)' + assert trees[3][1].pragmas[0].value ==\ + 'acc parallel loop collapse(4) present(src,src_coords,u)' def test_multi_tile_blocking_structure(self): grid = Grid(shape=(8, 8, 8)) diff --git a/tests/test_gpu_openmp.py b/tests/test_gpu_openmp.py index bc2de71708..29866508d8 100644 --- a/tests/test_gpu_openmp.py +++ b/tests/test_gpu_openmp.py @@ -265,7 +265,7 @@ def test_timeparallel_reduction(self): assert not tree.root.pragmas assert len(tree[1].pragmas) == 1 assert tree[1].pragmas[0].value ==\ - ('omp target teams distribute parallel for collapse(2)' + ('omp target teams distribute parallel for collapse(3)' ' reduction(+:f[0])') diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py index 3a22ca1db7..97d86c1759 100644 --- a/tests/test_interpolation.py +++ b/tests/test_interpolation.py @@ -5,7 +5,7 @@ from sympy import Float from devito import (Grid, Operator, Dimension, SparseFunction, SparseTimeFunction, - Function, TimeFunction, DefaultDimension, Eq, + Function, TimeFunction, DefaultDimension, Eq, switchconfig, PrecomputedSparseFunction, PrecomputedSparseTimeFunction, MatrixSparseTimeFunction) from examples.seismic import (demo_model, TimeAxis, RickerSource, Receiver, @@ -734,3 +734,30 @@ class SparseFirst(SparseFunction): op(time_M=10) expected = 10*11/2 # n (n+1)/2 assert np.allclose(s.data, expected) + + +@switchconfig(safe_math=True) +def test_inject_function(): + nt = 11 + + grid = Grid(shape=(5, 5)) + u = TimeFunction(name="u", grid=grid, time_order=2) + src = SparseTimeFunction(name="src", grid=grid, nt=nt, npoint=1, + coordinates=[[0.5, 0.5]]) + + nfreq = 5 + freq_dim = DefaultDimension(name="freq", default_value=nfreq) + omega = Function(name="omega", dimensions=(freq_dim,), shape=(nfreq,), grid=grid) + omega.data.fill(1.) + + inj = src.inject(field=u.forward, expr=omega) + + op = Operator([inj]) + + op(time_M=0) + assert u.data[1, 2, 2] == nfreq + assert np.all(u.data[0] == 0) + assert np.all(u.data[2] == 0) + for i in [0, 1, 3, 4]: + for j in [0, 1, 3, 4]: + assert u.data[1, i, j] == 0 diff --git a/tests/test_mpi.py b/tests/test_mpi.py index 14ddbec249..ab7092ba1a 100644 --- a/tests/test_mpi.py +++ b/tests/test_mpi.py @@ -2499,7 +2499,7 @@ def test_adjoint_codegen(self, shape, kernel, space_order, save): op_adj = solver.op_adj() adj_calls = FindNodes(Call).visit(op_adj) - # one halo, ndim memalign and free (pos temp rec) + # one halo, ndim memalign and free (pos temp rec/src) sf_calls = 2 * len(shape) assert len(fwd_calls) == 1 + sf_calls assert len(adj_calls) == 1 + sf_calls @@ -2558,7 +2558,8 @@ def test_adjoint_F_no_omp(self): # TestDecomposition().test_reshape_left_right() # TestOperatorSimple().test_trivial_eq_2d() # TestFunction().test_halo_exchange_bilateral() - TestSparseFunction().test_sparse_coords() + # TestSparseFunction().test_sparse_coords() # TestSparseFunction().test_precomputed_sparse(2) # TestOperatorAdvanced().test_fission_due_to_antidep() + TestOperatorAdvanced().test_injection_wodup_wtime() # TestIsotropicAcoustic().test_adjoint_F(1) diff --git a/tests/test_operator.py b/tests/test_operator.py index f38ac01942..3064565e3c 100644 --- a/tests/test_operator.py +++ b/tests/test_operator.py @@ -707,6 +707,8 @@ def verify_arguments(self, arguments, expected): if isinstance(v, (Function, SparseFunction)): condition = v._C_as_ndarray(arguments[name])[v._mask_domain] == v.data condition = condition.all() + elif isinstance(arguments[name], range): + condition = arguments[name].start <= v < arguments[name].stop else: condition = arguments[name] == v