From 8bdf54e272a4decc0b9f6508bc8370e935237cec Mon Sep 17 00:00:00 2001 From: mloubout Date: Tue, 9 Jan 2024 13:31:36 -0500 Subject: [PATCH 1/3] example: small cleanup of tti for easier reuse --- devito/types/dimension.py | 2 +- examples/seismic/preset_models.py | 8 +- examples/seismic/tti/operators.py | 198 ++++++++---------------------- tests/test_dse.py | 6 +- 4 files changed, 56 insertions(+), 158 deletions(-) diff --git a/devito/types/dimension.py b/devito/types/dimension.py index 839c60be60..a429277e57 100644 --- a/devito/types/dimension.py +++ b/devito/types/dimension.py @@ -922,7 +922,7 @@ def _arg_defaults(self, _min=None, size=None, alias=None): except AttributeError: factor = dim._factor - defaults[dim.parent.max_name] = range(1, factor*size - 1) + defaults[dim.parent.max_name] = range(0, factor*size - 1) return defaults diff --git a/examples/seismic/preset_models.py b/examples/seismic/preset_models.py index 2f44ffddf8..5748bc383f 100644 --- a/examples/seismic/preset_models.py +++ b/examples/seismic/preset_models.py @@ -222,12 +222,12 @@ def demo_model(preset, **kwargs): for i in range(1, nlayers): v[..., i*int(shape[-1] / nlayers):] = vp_i[i] # Bottom velocity - epsilon = .3*(v - 1.5) - delta = .2*(v - 1.5) - theta = .5*(v - 1.5) + epsilon = .1*(v - vp_top) + delta = .05*(v - vp_top) + theta = .5*(v - vp_top) phi = None if len(shape) > 2 and preset.lower() not in ['layers-tti-noazimuth']: - phi = .25*(v - 1.5) + phi = .25*(v - vp_top) if density: kwargs['b'] = Gardners(v) diff --git a/examples/seismic/tti/operators.py b/examples/seismic/tti/operators.py index 87a00f4cba..b7813fd772 100644 --- a/examples/seismic/tti/operators.py +++ b/examples/seismic/tti/operators.py @@ -1,5 +1,5 @@ from devito import (Eq, Operator, Function, TimeFunction, NODE, Inc, solve, - cos, sin, sqrt) + cos, sin, sqrt, div, grad) from examples.seismic import PointSource, Receiver from examples.seismic.acoustic.operators import freesurface @@ -57,78 +57,70 @@ def trig_func(model): return costheta, sintheta -def Gzz_centered(model, field, costheta, sintheta, cosphi, sinphi, space_order): +def Gzz_centered(model, field): """ 3D rotated second order derivative in the direction z. Parameters ---------- + model : Model + Physical parameters model structure. field : Function Input for which the derivative is computed. - costheta : Function or float - Cosine of the tilt angle. - sintheta : Function or float - Sine of the tilt angle. - cosphi : Function or float - Cosine of the azymuth angle. - sinphi : Function or float - Sine of the azymuth angle. - space_order : int - Space discretization order. Returns ------- Rotated second order derivative w.r.t. z. """ - order1 = space_order // 2 + b = getattr(model, 'b', 1) + costheta, sintheta, cosphi, sinphi = trig_func(model) + order1 = field.space_order // 2 Gz = -(sintheta * cosphi * field.dx(fd_order=order1) + sintheta * sinphi * field.dy(fd_order=order1) + costheta * field.dz(fd_order=order1)) - Gzz = (Gz * costheta).dz(fd_order=order1).T + Gzz = (b * Gz * costheta).dz(fd_order=order1).T # Add rotated derivative if angles are not zero. If angles are # zeros then `0*Gz = 0` and doesn't have any `.dy` .... if sintheta != 0: - Gzz += (Gz * sintheta * cosphi).dx(fd_order=order1).T + Gzz += (b * Gz * sintheta * cosphi).dx(fd_order=order1).T if sinphi != 0: - Gzz += (Gz * sintheta * sinphi).dy(fd_order=order1).T + Gzz += (b * Gz * sintheta * sinphi).dy(fd_order=order1).T return Gzz -def Gzz_centered_2d(model, field, costheta, sintheta, space_order): +def Gzz_centered_2d(model, field): """ 2D rotated second order derivative in the direction z. Parameters ---------- + model : Model + Physical parameters model structure. field : Function Input for which the derivative is computed. - costheta : Function or float - Cosine of the tilt angle. - sintheta : Function or float - Sine of the tilt angle. - space_order : int - Space discretization order. Returns ------- Rotated second order derivative w.r.t. z. """ - order1 = space_order // 2 + costheta, sintheta = trig_func(model) + order1 = field.space_order // 2 + b = getattr(model, 'b', 1) Gz = -(sintheta * field.dx(fd_order=order1) + costheta * field.dy(fd_order=order1)) - Gzz = (Gz * costheta).dy(fd_order=order1).T + Gzz = (b * Gz * costheta).dy(fd_order=order1).T # Add rotated derivative if angles are not zero. If angles are # zeros then `0*Gz = 0` and doesn't have any `.dy` .... if sintheta != 0: - Gzz += (Gz * sintheta).dx(fd_order=order1).T + Gzz += (b * Gz * sintheta).dx(fd_order=order1).T return Gzz # Centered case produces directly Gxx + Gyy -def Gxxyy_centered(model, field, costheta, sintheta, cosphi, sinphi, space_order): +def Gh_centered(model, field): """ Sum of the 3D rotated second order derivative in the direction x and y. As the Laplacian is rotation invariant, it is computed as the conventional @@ -137,57 +129,29 @@ def Gxxyy_centered(model, field, costheta, sintheta, cosphi, sinphi, space_order Parameters ---------- + model : Model + Physical parameters model structure. field : Function Input field. - costheta : Function or float - Cosine of the tilt angle. - sintheta : Function or float - Sine of the tilt angle. - cosphi : Function or float - Cosine of the azymuth angle. - sinphi : Function or float - Sine of the azymuth angle. - space_order : int - Space discretization order. Returns ------- Sum of the 3D rotated second order derivative in the direction x and y. """ - Gzz = Gzz_centered(model, field, costheta, sintheta, cosphi, sinphi, space_order) - return field.laplace - Gzz - - -def Gxx_centered_2d(model, field, costheta, sintheta, space_order): - """ - 2D rotated second order derivative in the direction x. - As the Laplacian is rotation invariant, it is computed as the conventional - Laplacian minus the second order rotated second order derivative in the direction z - Gxx = field.laplace - Gzz - - Parameters - ---------- - field : TimeFunction - Input field. - costheta : Function or float - Cosine of the tilt angle. - sintheta : Function or float - Sine of the tilt angle. - cosphi : Function or float - Cosine of the azymuth angle. - sinphi : Function or float - Sine of the azymuth angle. - space_order : int - Space discretization order. - - Returns - ------- - Sum of the 3D rotated second order derivative in the direction x. - """ - return field.laplace - Gzz_centered_2d(model, field, costheta, sintheta, space_order) + if model.dim == 3: + Gzz = Gzz_centered(model, field) + else: + Gzz = Gzz_centered_2d(model, field) + b = getattr(model, 'b', None) + if b is not None: + so = field.space_order // 2 + lap = div(b * grad(field, shift=.5, order=so), shift=-.5, order=so) + else: + lap = field.laplace + return lap - Gzz -def kernel_centered_2d(model, u, v, space_order, **kwargs): +def kernel_centered(model, u, v, **kwargs): """ TTI finite difference kernel. The equation solved is: @@ -229,9 +193,6 @@ def kernel_centered_2d(model, u, v, space_order, **kwargs): # Forward or backward forward = kwargs.get('forward', True) - # Tilt and azymuth setup - costheta, sintheta = trig_func(model) - delta, epsilon = model.delta, model.epsilon epsilon = 1 + 2*epsilon delta = sqrt(1 + 2*delta) @@ -240,82 +201,17 @@ def kernel_centered_2d(model, u, v, space_order, **kwargs): qu = kwargs.get('qu', 0) qv = kwargs.get('qv', 0) - if forward: - Gxx = Gxx_centered_2d(model, u, costheta, sintheta, space_order) - Gzz = Gzz_centered_2d(model, v, costheta, sintheta, space_order) - H0 = epsilon*Gxx + delta*Gzz - Hz = delta*Gxx + Gzz - return second_order_stencil(model, u, v, H0, Hz, qu, qv) - else: - H0 = Gxx_centered_2d(model, (epsilon*u + delta*v), costheta, - sintheta, space_order) - Hz = Gzz_centered_2d(model, (delta*u + v), costheta, sintheta, space_order) - return second_order_stencil(model, u, v, H0, Hz, qu, qv, forward=forward) - - -def kernel_centered_3d(model, u, v, space_order, **kwargs): - """ - TTI finite difference kernel. The equation solved is: - - u.dt2 = H0 - v.dt2 = Hz - - where H0 and Hz are defined as: - H0 = (1+2 *epsilon) (Gxx(u)+Gyy(u)) + sqrt(1+ 2*delta) Gzz(v) - Hz = sqrt(1+ 2*delta) (Gxx(u)+Gyy(u)) + Gzz(v) - - and - - H0 = (Gxx+Gyy)((1+2 *epsilon)*u + sqrt(1+ 2*delta)*v) - Hz = Gzz(sqrt(1+ 2*delta)*u + v) - - for the forward and adjoint cases, respectively. Epsilon and delta are the Thomsen - parameters. This function computes H0 and Hz. - - References: - * Zhang, Yu, Houzhu Zhang, and Guanquan Zhang. "A stable TTI reverse - time migration and its implementation." Geophysics 76.3 (2011): WA3-WA11. - * Louboutin, Mathias, Philipp Witte, and Felix J. Herrmann. "Effects of - wrong adjoints for RTM in TTI media." SEG Technical Program Expanded - Abstracts 2018. Society of Exploration Geophysicists, 2018. 331-335. - - Parameters - ---------- - u : TimeFunction - First TTI field. - v : TimeFunction - Second TTI field. - space_order : int - Space discretization order. - - Returns - ------- - u and v component of the rotated Laplacian in 3D. - """ - # Forward or backward - forward = kwargs.get('forward', True) - - costheta, sintheta, cosphi, sinphi = trig_func(model) - - delta, epsilon = model.delta, model.epsilon - epsilon = 1 + 2*epsilon - delta = sqrt(1 + 2*delta) - - # Get source - qu = kwargs.get('qu', 0) - qv = kwargs.get('qv', 0) + Gzz = Gzz_centered_2d if model.dim == 2 else Gzz_centered if forward: - Gxx = Gxxyy_centered(model, u, costheta, sintheta, cosphi, sinphi, space_order) - Gzz = Gzz_centered(model, v, costheta, sintheta, cosphi, sinphi, space_order) + Gxx = Gh_centered(model, u) + Gzz = Gzz(model, v) H0 = epsilon*Gxx + delta*Gzz Hz = delta*Gxx + Gzz return second_order_stencil(model, u, v, H0, Hz, qu, qv) else: - H0 = Gxxyy_centered(model, (epsilon*u + delta*v), costheta, sintheta, - cosphi, sinphi, space_order) - Hz = Gzz_centered(model, (delta*u + v), costheta, sintheta, cosphi, - sinphi, space_order) + H0 = Gh_centered(model, (epsilon*u + delta*v)) + Hz = Gzz(model, (delta*u + v)) return second_order_stencil(model, u, v, H0, Hz, qu, qv, forward=forward) @@ -351,7 +247,7 @@ def particle_velocity_fields(model, space_order): return vx, vz, vy -def kernel_staggered_2d(model, u, v, space_order, **kwargs): +def kernel_staggered_2d(model, u, v, **kwargs): """ TTI finite difference. The equation solved is: @@ -360,6 +256,7 @@ def kernel_staggered_2d(model, u, v, space_order, **kwargs): m * v.dt = - sqrt(1 + 2 delta) vx.dx - vz.dz + Fh m * u.dt = - (1 + 2 epsilon) vx.dx - sqrt(1 + 2 delta) vz.dz + Fv """ + space_order = u.space_order # Forward or backward forward = kwargs.get('forward', True) @@ -413,7 +310,7 @@ def kernel_staggered_2d(model, u, v, space_order, **kwargs): return [u_vx, u_vz] + [pv_eq, ph_eq] -def kernel_staggered_3d(model, u, v, space_order, **kwargs): +def kernel_staggered_3d(model, u, v, **kwargs): """ TTI finite difference. The equation solved is: @@ -423,6 +320,7 @@ def kernel_staggered_3d(model, u, v, space_order, **kwargs): m * v.dt = - sqrt(1 + 2 delta) (vx.dx + vy.dy) - vz.dz + Fh m * u.dt = - (1 + 2 epsilon) (vx.dx + vy.dy) - sqrt(1 + 2 delta) vz.dz + Fv """ + space_order = u.space_order # Forward or backward forward = kwargs.get('forward', True) @@ -547,7 +445,7 @@ def ForwardOperator(model, geometry, space_order=4, # FD kernels of the PDE FD_kernel = kernels[(kernel, len(model.shape))] - stencils = FD_kernel(model, u, v, space_order) + stencils = FD_kernel(model, u, v) # Source and receivers expr = src * dt / m if kernel == 'staggered' else src * dt**2 / m @@ -596,7 +494,7 @@ def AdjointOperator(model, geometry, space_order=4, # FD kernels of the PDE FD_kernel = kernels[(kernel, len(model.shape))] - stencils = FD_kernel(model, p, r, space_order, forward=False) + stencils = FD_kernel(model, p, r, forward=False) # Construct expression to inject receiver values expr = rec * dt / m if kernel == 'staggered' else rec * dt**2 / m @@ -650,13 +548,13 @@ def JacobianOperator(model, geometry, space_order=4, # FD kernels of the PDE FD_kernel = kernels[('centered', len(model.shape))] - eqn1 = FD_kernel(model, u0, v0, space_order) + eqn1 = FD_kernel(model, u0, v0) # Linearized source and stencil lin_usrc = -dm * u0.dt2 lin_vsrc = -dm * v0.dt2 - eqn2 = FD_kernel(model, du, dv, space_order, qu=lin_usrc, qv=lin_vsrc) + eqn2 = FD_kernel(model, du, dv, qu=lin_usrc, qv=lin_vsrc) # Construct expression to inject source values, injecting at u0(t+dt)/v0(t+dt) src_term = src.inject(field=(u0.forward, v0.forward), expr=src * dt**2 / m) @@ -708,7 +606,7 @@ def JacobianAdjOperator(model, geometry, space_order=4, # FD kernels of the PDE FD_kernel = kernels[('centered', len(model.shape))] - eqn = FD_kernel(model, du, dv, space_order, forward=False) + eqn = FD_kernel(model, du, dv, forward=False) dm_update = Inc(dm, - (u0 * du.dt2 + v0 * dv.dt2)) @@ -720,5 +618,5 @@ def JacobianAdjOperator(model, geometry, space_order=4, name='GradientTTI', **kwargs) -kernels = {('centered', 3): kernel_centered_3d, ('centered', 2): kernel_centered_2d, +kernels = {('centered', 3): kernel_centered, ('centered', 2): kernel_centered, ('staggered', 3): kernel_staggered_3d, ('staggered', 2): kernel_staggered_2d} diff --git a/tests/test_dse.py b/tests/test_dse.py index 8f4d756a8b..5341bb010c 100644 --- a/tests/test_dse.py +++ b/tests/test_dse.py @@ -2768,7 +2768,7 @@ def model(self): # TTI layered model for the tti test, no need for a smooth interace # bewtween the two layer as the compilation passes are tested, not the # physical prettiness of the result -- which ultimately saves time - return demo_model('layers-tti', nlayers=3, nbl=10, space_order=4, + return demo_model('layers-tti', nlayers=3, nbl=10, space_order=8, shape=(50, 50, 50), spacing=(20., 20., 20.), smooth=False) @cached_property @@ -2816,7 +2816,7 @@ def test_fullopt(self): # Check expected opcount/oi assert summary[('section1', None)].ops == 92 - assert np.isclose(summary[('section1', None)].oi, 2.074, atol=0.001) + assert np.isclose(summary[('section1', None)].oi, 2.072, atol=0.001) # With optimizations enabled, there should be exactly four BlockDimensions op = wavesolver.op_fwd() @@ -2874,7 +2874,7 @@ def test_opcounts(self, space_order, expected): @switchconfig(profiling='advanced') @pytest.mark.parametrize('space_order,exp_ops,exp_arrays', [ - (4, 122, 6), (8, 221, 7) + (4, 122, 6), (8, 235, 7) ]) def test_opcounts_adjoint(self, space_order, exp_ops, exp_arrays): wavesolver = self.tti_operator(space_order=space_order, From 7a647b9339313fc515287f8fed679271f850eea7 Mon Sep 17 00:00:00 2001 From: mloubout Date: Fri, 12 Jan 2024 10:24:32 -0500 Subject: [PATCH 2/3] arch: supports all apple mx (m1, m2, m3....) up to latest m3 so far --- devito/arch/archinfo.py | 15 +++++++++++++-- devito/arch/compiler.py | 12 +++++++----- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/devito/arch/archinfo.py b/devito/arch/archinfo.py index 7563359669..afaa49d86f 100644 --- a/devito/arch/archinfo.py +++ b/devito/arch/archinfo.py @@ -23,7 +23,7 @@ 'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'KNL', 'KNL7210', 'SKX', 'KLX', 'CLX', 'CLK', 'SPR', # ARM CPUs - 'AMD', 'ARM', 'M1', 'GRAVITON', + 'AMD', 'ARM', 'AppleArm', 'M1', 'M2', 'M3', 'GRAVITON', # Other legacy CPUs 'POWER8', 'POWER9', # Generic GPUs @@ -709,6 +709,15 @@ class Arm(Cpu64): known_isas = ('fp', 'asimd', 'asimdrdm') +class AppleArm(Arm): + + @cached_property + def march(self): + sysinfo = run(["sysctl", "-n", "machdep.cpu.brand_string"], + stdout=PIPE, stderr=DEVNULL).stdout.decode("utf-8") + return sysinfo.split(' ')[1].lower() + + class Amd(Cpu64): known_isas = ('cpp', 'sse', 'avx', 'avx2') @@ -839,7 +848,9 @@ def march(cls): ARM = Arm('arm') GRAVITON = Arm('graviton') -M1 = Arm('m1') +M1 = AppleArm('m1') +M2 = AppleArm('m2') +M3 = AppleArm('m3') AMD = Amd('amd') diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py index 3b102f1822..35f522a0b8 100644 --- a/devito/arch/compiler.py +++ b/devito/arch/compiler.py @@ -14,7 +14,7 @@ from codepy.toolchain import (GCCToolchain, call_capture_output as _call_capture_output) -from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, POWER8, POWER9, GRAVITON, +from devito.arch import (AMDGPUX, Cpu64, AppleArm, NVIDIAX, POWER8, POWER9, GRAVITON, IntelDevice, get_nvidia_cc, check_cuda_runtime, get_m1_llvm_path) from devito.exceptions import CompilationError @@ -486,14 +486,16 @@ def __init_finalize__(self, **kwargs): '-fopenmp-targets=amdgcn-amd-amdhsa', '-Xopenmp-target=amdgcn-amd-amdhsa'] self.ldflags += ['-march=%s' % platform.march] - elif platform is M1: + elif isinstance(platform, AppleArm): # NOTE: - # Apple M1 supports OpenMP through Apple's LLVM compiler. + # Apple Mx supports OpenMP through Apple's LLVM compiler. # The compiler can be installed with Homebrew or can be built from scratch. # Check if installed and set compiler flags accordingly llvmm1 = get_m1_llvm_path(language) if llvmm1 and language == 'openmp': - self.ldflags += ['-mcpu=apple-m1', '-fopenmp', '-L%s' % llvmm1['libs']] + mx = platform.march + self.ldflags += ['-mcpu=apple-%s' % mx, + '-fopenmp', '-L%s' % llvmm1['libs']] self.cflags += ['-Xclang', '-I%s' % llvmm1['include']] else: if platform in [POWER8, POWER9]: @@ -895,7 +897,7 @@ def __new__(cls, *args, **kwargs): platform = kwargs.pop('platform', configuration['platform']) language = kwargs.pop('language', configuration['language']) - if platform is M1: + if isinstance(platform, AppleArm): _base = ClangCompiler elif isinstance(platform, IntelDevice): _base = OneapiCompiler From 33ab5b311589ba6120ec57281617f0fce091a721 Mon Sep 17 00:00:00 2001 From: mloubout Date: Fri, 12 Jan 2024 12:37:06 -0500 Subject: [PATCH 3/3] misc: prevent arithmetic reductions on non float --- devito/builtins/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/devito/builtins/utils.py b/devito/builtins/utils.py index 84c336b15c..edc1bfd9ed 100644 --- a/devito/builtins/utils.py +++ b/devito/builtins/utils.py @@ -27,7 +27,7 @@ def __init__(self, *functions, op=dv.mpi.MPI.SUM, dtype=None): else: dtype = {f.dtype for f in functions} if len(dtype) == 1: - self.dtype = dtype.pop() + self.dtype = np.result_type(dtype.pop(), np.float32).type else: raise ValueError("Illegal mixed data types") self.v = None @@ -37,7 +37,7 @@ def __enter__(self): i = dv.Dimension(name='mri',) self.n = dv.Function(name='n', shape=(1,), dimensions=(i,), grid=self.grid, dtype=self.dtype, space='host') - self.n.data[0] = 0 + self.n.data[:] = 0 return self def __exit__(self, exc_type, exc_value, traceback):