From 8bdf54e272a4decc0b9f6508bc8370e935237cec Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Tue, 9 Jan 2024 13:31:36 -0500
Subject: [PATCH 1/3] example: small cleanup of tti for easier reuse

---
 devito/types/dimension.py         |   2 +-
 examples/seismic/preset_models.py |   8 +-
 examples/seismic/tti/operators.py | 198 ++++++++----------------------
 tests/test_dse.py                 |   6 +-
 4 files changed, 56 insertions(+), 158 deletions(-)

diff --git a/devito/types/dimension.py b/devito/types/dimension.py
index 839c60be60..a429277e57 100644
--- a/devito/types/dimension.py
+++ b/devito/types/dimension.py
@@ -922,7 +922,7 @@ def _arg_defaults(self, _min=None, size=None, alias=None):
         except AttributeError:
             factor = dim._factor
 
-        defaults[dim.parent.max_name] = range(1, factor*size - 1)
+        defaults[dim.parent.max_name] = range(0, factor*size - 1)
 
         return defaults
 
diff --git a/examples/seismic/preset_models.py b/examples/seismic/preset_models.py
index 2f44ffddf8..5748bc383f 100644
--- a/examples/seismic/preset_models.py
+++ b/examples/seismic/preset_models.py
@@ -222,12 +222,12 @@ def demo_model(preset, **kwargs):
         for i in range(1, nlayers):
             v[..., i*int(shape[-1] / nlayers):] = vp_i[i]  # Bottom velocity
 
-        epsilon = .3*(v - 1.5)
-        delta = .2*(v - 1.5)
-        theta = .5*(v - 1.5)
+        epsilon = .1*(v - vp_top)
+        delta = .05*(v - vp_top)
+        theta = .5*(v - vp_top)
         phi = None
         if len(shape) > 2 and preset.lower() not in ['layers-tti-noazimuth']:
-            phi = .25*(v - 1.5)
+            phi = .25*(v - vp_top)
 
         if density:
             kwargs['b'] = Gardners(v)
diff --git a/examples/seismic/tti/operators.py b/examples/seismic/tti/operators.py
index 87a00f4cba..b7813fd772 100644
--- a/examples/seismic/tti/operators.py
+++ b/examples/seismic/tti/operators.py
@@ -1,5 +1,5 @@
 from devito import (Eq, Operator, Function, TimeFunction, NODE, Inc, solve,
-                    cos, sin, sqrt)
+                    cos, sin, sqrt, div, grad)
 from examples.seismic import PointSource, Receiver
 from examples.seismic.acoustic.operators import freesurface
 
@@ -57,78 +57,70 @@ def trig_func(model):
     return costheta, sintheta
 
 
-def Gzz_centered(model, field, costheta, sintheta, cosphi, sinphi, space_order):
+def Gzz_centered(model, field):
     """
     3D rotated second order derivative in the direction z.
 
     Parameters
     ----------
+    model : Model
+        Physical parameters model structure.
     field : Function
         Input for which the derivative is computed.
-    costheta : Function or float
-        Cosine of the tilt angle.
-    sintheta : Function or float
-        Sine of the tilt angle.
-    cosphi : Function or float
-        Cosine of the azymuth angle.
-    sinphi : Function or float
-        Sine of the azymuth angle.
-    space_order : int
-        Space discretization order.
 
     Returns
     -------
     Rotated second order derivative w.r.t. z.
     """
-    order1 = space_order // 2
+    b = getattr(model, 'b', 1)
+    costheta, sintheta, cosphi, sinphi = trig_func(model)
+    order1 = field.space_order // 2
     Gz = -(sintheta * cosphi * field.dx(fd_order=order1) +
            sintheta * sinphi * field.dy(fd_order=order1) +
            costheta * field.dz(fd_order=order1))
 
-    Gzz = (Gz * costheta).dz(fd_order=order1).T
+    Gzz = (b * Gz * costheta).dz(fd_order=order1).T
     # Add rotated derivative if angles are not zero. If angles are
     # zeros then `0*Gz = 0` and doesn't have any `.dy` ....
     if sintheta != 0:
-        Gzz += (Gz * sintheta * cosphi).dx(fd_order=order1).T
+        Gzz += (b * Gz * sintheta * cosphi).dx(fd_order=order1).T
     if sinphi != 0:
-        Gzz += (Gz * sintheta * sinphi).dy(fd_order=order1).T
+        Gzz += (b * Gz * sintheta * sinphi).dy(fd_order=order1).T
 
     return Gzz
 
 
-def Gzz_centered_2d(model, field, costheta, sintheta, space_order):
+def Gzz_centered_2d(model, field):
     """
     2D rotated second order derivative in the direction z.
 
     Parameters
     ----------
+    model : Model
+        Physical parameters model structure.
     field : Function
         Input for which the derivative is computed.
-    costheta : Function or float
-        Cosine of the tilt angle.
-    sintheta : Function or float
-        Sine of the tilt angle.
-    space_order : int
-        Space discretization order.
 
     Returns
     -------
     Rotated second order derivative w.r.t. z.
     """
-    order1 = space_order // 2
+    costheta, sintheta = trig_func(model)
+    order1 = field.space_order // 2
+    b = getattr(model, 'b', 1)
     Gz = -(sintheta * field.dx(fd_order=order1) +
            costheta * field.dy(fd_order=order1))
-    Gzz = (Gz * costheta).dy(fd_order=order1).T
+    Gzz = (b * Gz * costheta).dy(fd_order=order1).T
 
     # Add rotated derivative if angles are not zero. If angles are
     # zeros then `0*Gz = 0` and doesn't have any `.dy` ....
     if sintheta != 0:
-        Gzz += (Gz * sintheta).dx(fd_order=order1).T
+        Gzz += (b * Gz * sintheta).dx(fd_order=order1).T
     return Gzz
 
 
 # Centered case produces directly Gxx + Gyy
-def Gxxyy_centered(model, field, costheta, sintheta, cosphi, sinphi, space_order):
+def Gh_centered(model, field):
     """
     Sum of the 3D rotated second order derivative in the direction x and y.
     As the Laplacian is rotation invariant, it is computed as the conventional
@@ -137,57 +129,29 @@ def Gxxyy_centered(model, field, costheta, sintheta, cosphi, sinphi, space_order
 
     Parameters
     ----------
+    model : Model
+        Physical parameters model structure.
     field : Function
         Input field.
-    costheta : Function or float
-        Cosine of the tilt angle.
-    sintheta : Function or float
-        Sine of the tilt angle.
-    cosphi : Function or float
-        Cosine of the azymuth angle.
-    sinphi : Function or float
-        Sine of the azymuth angle.
-    space_order : int
-        Space discretization order.
 
     Returns
     -------
     Sum of the 3D rotated second order derivative in the direction x and y.
     """
-    Gzz = Gzz_centered(model, field, costheta, sintheta, cosphi, sinphi, space_order)
-    return field.laplace - Gzz
-
-
-def Gxx_centered_2d(model, field, costheta, sintheta, space_order):
-    """
-    2D rotated second order derivative in the direction x.
-    As the Laplacian is rotation invariant, it is computed as the conventional
-    Laplacian minus the second order rotated second order derivative in the direction z
-    Gxx = field.laplace - Gzz
-
-    Parameters
-    ----------
-    field : TimeFunction
-        Input field.
-    costheta : Function or float
-        Cosine of the tilt angle.
-    sintheta : Function or float
-        Sine of the tilt angle.
-    cosphi : Function or float
-        Cosine of the azymuth angle.
-    sinphi : Function or float
-        Sine of the azymuth angle.
-    space_order : int
-        Space discretization order.
-
-    Returns
-    -------
-    Sum of the 3D rotated second order derivative in the direction x.
-    """
-    return field.laplace - Gzz_centered_2d(model, field, costheta, sintheta, space_order)
+    if model.dim == 3:
+        Gzz = Gzz_centered(model, field)
+    else:
+        Gzz = Gzz_centered_2d(model, field)
+    b = getattr(model, 'b', None)
+    if b is not None:
+        so = field.space_order // 2
+        lap = div(b * grad(field, shift=.5, order=so), shift=-.5, order=so)
+    else:
+        lap = field.laplace
+    return lap - Gzz
 
 
-def kernel_centered_2d(model, u, v, space_order, **kwargs):
+def kernel_centered(model, u, v, **kwargs):
     """
     TTI finite difference kernel. The equation solved is:
 
@@ -229,9 +193,6 @@ def kernel_centered_2d(model, u, v, space_order, **kwargs):
     # Forward or backward
     forward = kwargs.get('forward', True)
 
-    # Tilt and azymuth setup
-    costheta, sintheta = trig_func(model)
-
     delta, epsilon = model.delta, model.epsilon
     epsilon = 1 + 2*epsilon
     delta = sqrt(1 + 2*delta)
@@ -240,82 +201,17 @@ def kernel_centered_2d(model, u, v, space_order, **kwargs):
     qu = kwargs.get('qu', 0)
     qv = kwargs.get('qv', 0)
 
-    if forward:
-        Gxx = Gxx_centered_2d(model, u, costheta, sintheta, space_order)
-        Gzz = Gzz_centered_2d(model, v, costheta, sintheta, space_order)
-        H0 = epsilon*Gxx + delta*Gzz
-        Hz = delta*Gxx + Gzz
-        return second_order_stencil(model, u, v, H0, Hz, qu, qv)
-    else:
-        H0 = Gxx_centered_2d(model, (epsilon*u + delta*v), costheta,
-                             sintheta, space_order)
-        Hz = Gzz_centered_2d(model, (delta*u + v), costheta, sintheta, space_order)
-        return second_order_stencil(model, u, v, H0, Hz, qu, qv, forward=forward)
-
-
-def kernel_centered_3d(model, u, v, space_order, **kwargs):
-    """
-    TTI finite difference kernel. The equation solved is:
-
-    u.dt2 = H0
-    v.dt2 = Hz
-
-    where H0 and Hz are defined as:
-    H0 = (1+2 *epsilon) (Gxx(u)+Gyy(u)) + sqrt(1+ 2*delta) Gzz(v)
-    Hz = sqrt(1+ 2*delta) (Gxx(u)+Gyy(u)) +  Gzz(v)
-
-    and
-
-    H0 = (Gxx+Gyy)((1+2 *epsilon)*u + sqrt(1+ 2*delta)*v)
-    Hz = Gzz(sqrt(1+ 2*delta)*u + v)
-
-    for the forward and adjoint cases, respectively. Epsilon and delta are the Thomsen
-    parameters. This function computes H0 and Hz.
-
-    References:
-        * Zhang, Yu, Houzhu Zhang, and Guanquan Zhang. "A stable TTI reverse
-          time migration and its implementation." Geophysics 76.3 (2011): WA3-WA11.
-        * Louboutin, Mathias, Philipp Witte, and Felix J. Herrmann. "Effects of
-          wrong adjoints for RTM in TTI media." SEG Technical Program Expanded
-          Abstracts 2018. Society of Exploration Geophysicists, 2018. 331-335.
-
-    Parameters
-    ----------
-    u : TimeFunction
-        First TTI field.
-    v : TimeFunction
-        Second TTI field.
-    space_order : int
-        Space discretization order.
-
-    Returns
-    -------
-    u and v component of the rotated Laplacian in 3D.
-    """
-    # Forward or backward
-    forward = kwargs.get('forward', True)
-
-    costheta, sintheta, cosphi, sinphi = trig_func(model)
-
-    delta, epsilon = model.delta, model.epsilon
-    epsilon = 1 + 2*epsilon
-    delta = sqrt(1 + 2*delta)
-
-    # Get source
-    qu = kwargs.get('qu', 0)
-    qv = kwargs.get('qv', 0)
+    Gzz = Gzz_centered_2d if model.dim == 2 else Gzz_centered
 
     if forward:
-        Gxx = Gxxyy_centered(model, u, costheta, sintheta, cosphi, sinphi, space_order)
-        Gzz = Gzz_centered(model, v, costheta, sintheta, cosphi, sinphi, space_order)
+        Gxx = Gh_centered(model, u)
+        Gzz = Gzz(model, v)
         H0 = epsilon*Gxx + delta*Gzz
         Hz = delta*Gxx + Gzz
         return second_order_stencil(model, u, v, H0, Hz, qu, qv)
     else:
-        H0 = Gxxyy_centered(model, (epsilon*u + delta*v), costheta, sintheta,
-                            cosphi, sinphi, space_order)
-        Hz = Gzz_centered(model, (delta*u + v), costheta, sintheta, cosphi,
-                          sinphi, space_order)
+        H0 = Gh_centered(model, (epsilon*u + delta*v))
+        Hz = Gzz(model, (delta*u + v))
         return second_order_stencil(model, u, v, H0, Hz, qu, qv, forward=forward)
 
 
@@ -351,7 +247,7 @@ def particle_velocity_fields(model, space_order):
     return vx, vz, vy
 
 
-def kernel_staggered_2d(model, u, v, space_order, **kwargs):
+def kernel_staggered_2d(model, u, v, **kwargs):
     """
     TTI finite difference. The equation solved is:
 
@@ -360,6 +256,7 @@ def kernel_staggered_2d(model, u, v, space_order, **kwargs):
     m * v.dt = - sqrt(1 + 2 delta) vx.dx - vz.dz + Fh
     m * u.dt = - (1 + 2 epsilon) vx.dx - sqrt(1 + 2 delta) vz.dz + Fv
     """
+    space_order = u.space_order
     # Forward or backward
     forward = kwargs.get('forward', True)
 
@@ -413,7 +310,7 @@ def kernel_staggered_2d(model, u, v, space_order, **kwargs):
     return [u_vx, u_vz] + [pv_eq, ph_eq]
 
 
-def kernel_staggered_3d(model, u, v, space_order, **kwargs):
+def kernel_staggered_3d(model, u, v, **kwargs):
     """
     TTI finite difference. The equation solved is:
 
@@ -423,6 +320,7 @@ def kernel_staggered_3d(model, u, v, space_order, **kwargs):
     m * v.dt = - sqrt(1 + 2 delta) (vx.dx + vy.dy) - vz.dz + Fh
     m * u.dt = - (1 + 2 epsilon) (vx.dx + vy.dy) - sqrt(1 + 2 delta) vz.dz + Fv
     """
+    space_order = u.space_order
     # Forward or backward
     forward = kwargs.get('forward', True)
 
@@ -547,7 +445,7 @@ def ForwardOperator(model, geometry, space_order=4,
 
     # FD kernels of the PDE
     FD_kernel = kernels[(kernel, len(model.shape))]
-    stencils = FD_kernel(model, u, v, space_order)
+    stencils = FD_kernel(model, u, v)
 
     # Source and receivers
     expr = src * dt / m if kernel == 'staggered' else src * dt**2 / m
@@ -596,7 +494,7 @@ def AdjointOperator(model, geometry, space_order=4,
 
     # FD kernels of the PDE
     FD_kernel = kernels[(kernel, len(model.shape))]
-    stencils = FD_kernel(model, p, r, space_order, forward=False)
+    stencils = FD_kernel(model, p, r, forward=False)
 
     # Construct expression to inject receiver values
     expr = rec * dt / m if kernel == 'staggered' else rec * dt**2 / m
@@ -650,13 +548,13 @@ def JacobianOperator(model, geometry, space_order=4,
 
     # FD kernels of the PDE
     FD_kernel = kernels[('centered', len(model.shape))]
-    eqn1 = FD_kernel(model, u0, v0, space_order)
+    eqn1 = FD_kernel(model, u0, v0)
 
     # Linearized source and stencil
     lin_usrc = -dm * u0.dt2
     lin_vsrc = -dm * v0.dt2
 
-    eqn2 = FD_kernel(model, du, dv, space_order, qu=lin_usrc, qv=lin_vsrc)
+    eqn2 = FD_kernel(model, du, dv, qu=lin_usrc, qv=lin_vsrc)
 
     # Construct expression to inject source values, injecting at u0(t+dt)/v0(t+dt)
     src_term = src.inject(field=(u0.forward, v0.forward), expr=src * dt**2 / m)
@@ -708,7 +606,7 @@ def JacobianAdjOperator(model, geometry, space_order=4,
 
     # FD kernels of the PDE
     FD_kernel = kernels[('centered', len(model.shape))]
-    eqn = FD_kernel(model, du, dv, space_order, forward=False)
+    eqn = FD_kernel(model, du, dv, forward=False)
 
     dm_update = Inc(dm, - (u0 * du.dt2 + v0 * dv.dt2))
 
@@ -720,5 +618,5 @@ def JacobianAdjOperator(model, geometry, space_order=4,
                     name='GradientTTI', **kwargs)
 
 
-kernels = {('centered', 3): kernel_centered_3d, ('centered', 2): kernel_centered_2d,
+kernels = {('centered', 3): kernel_centered, ('centered', 2): kernel_centered,
            ('staggered', 3): kernel_staggered_3d, ('staggered', 2): kernel_staggered_2d}
diff --git a/tests/test_dse.py b/tests/test_dse.py
index 8f4d756a8b..5341bb010c 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -2768,7 +2768,7 @@ def model(self):
         # TTI layered model for the tti test, no need for a smooth interace
         # bewtween the two layer as the compilation passes are tested, not the
         # physical prettiness of the result -- which ultimately saves time
-        return demo_model('layers-tti', nlayers=3, nbl=10, space_order=4,
+        return demo_model('layers-tti', nlayers=3, nbl=10, space_order=8,
                           shape=(50, 50, 50), spacing=(20., 20., 20.), smooth=False)
 
     @cached_property
@@ -2816,7 +2816,7 @@ def test_fullopt(self):
 
         # Check expected opcount/oi
         assert summary[('section1', None)].ops == 92
-        assert np.isclose(summary[('section1', None)].oi, 2.074, atol=0.001)
+        assert np.isclose(summary[('section1', None)].oi, 2.072, atol=0.001)
 
         # With optimizations enabled, there should be exactly four BlockDimensions
         op = wavesolver.op_fwd()
@@ -2874,7 +2874,7 @@ def test_opcounts(self, space_order, expected):
 
     @switchconfig(profiling='advanced')
     @pytest.mark.parametrize('space_order,exp_ops,exp_arrays', [
-        (4, 122, 6), (8, 221, 7)
+        (4, 122, 6), (8, 235, 7)
     ])
     def test_opcounts_adjoint(self, space_order, exp_ops, exp_arrays):
         wavesolver = self.tti_operator(space_order=space_order,

From 7a647b9339313fc515287f8fed679271f850eea7 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 12 Jan 2024 10:24:32 -0500
Subject: [PATCH 2/3] arch: supports all apple mx (m1, m2, m3....) up to latest
 m3 so far

---
 devito/arch/archinfo.py | 15 +++++++++++++--
 devito/arch/compiler.py | 12 +++++++-----
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/devito/arch/archinfo.py b/devito/arch/archinfo.py
index 7563359669..afaa49d86f 100644
--- a/devito/arch/archinfo.py
+++ b/devito/arch/archinfo.py
@@ -23,7 +23,7 @@
            'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'KNL', 'KNL7210',
            'SKX', 'KLX', 'CLX', 'CLK', 'SPR',
            # ARM CPUs
-           'AMD', 'ARM', 'M1', 'GRAVITON',
+           'AMD', 'ARM', 'AppleArm', 'M1', 'M2', 'M3', 'GRAVITON',
            # Other legacy CPUs
            'POWER8', 'POWER9',
            # Generic GPUs
@@ -709,6 +709,15 @@ class Arm(Cpu64):
     known_isas = ('fp', 'asimd', 'asimdrdm')
 
 
+class AppleArm(Arm):
+
+    @cached_property
+    def march(self):
+        sysinfo = run(["sysctl", "-n", "machdep.cpu.brand_string"],
+                      stdout=PIPE, stderr=DEVNULL).stdout.decode("utf-8")
+        return sysinfo.split(' ')[1].lower()
+
+
 class Amd(Cpu64):
 
     known_isas = ('cpp', 'sse', 'avx', 'avx2')
@@ -839,7 +848,9 @@ def march(cls):
 
 ARM = Arm('arm')
 GRAVITON = Arm('graviton')
-M1 = Arm('m1')
+M1 = AppleArm('m1')
+M2 = AppleArm('m2')
+M3 = AppleArm('m3')
 
 AMD = Amd('amd')
 
diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py
index 3b102f1822..35f522a0b8 100644
--- a/devito/arch/compiler.py
+++ b/devito/arch/compiler.py
@@ -14,7 +14,7 @@
 from codepy.toolchain import (GCCToolchain,
                               call_capture_output as _call_capture_output)
 
-from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, POWER8, POWER9, GRAVITON,
+from devito.arch import (AMDGPUX, Cpu64, AppleArm, NVIDIAX, POWER8, POWER9, GRAVITON,
                          IntelDevice, get_nvidia_cc, check_cuda_runtime,
                          get_m1_llvm_path)
 from devito.exceptions import CompilationError
@@ -486,14 +486,16 @@ def __init_finalize__(self, **kwargs):
                                  '-fopenmp-targets=amdgcn-amd-amdhsa',
                                  '-Xopenmp-target=amdgcn-amd-amdhsa']
                 self.ldflags += ['-march=%s' % platform.march]
-        elif platform is M1:
+        elif isinstance(platform, AppleArm):
             # NOTE:
-            # Apple M1 supports OpenMP through Apple's LLVM compiler.
+            # Apple Mx supports OpenMP through Apple's LLVM compiler.
             # The compiler can be installed with Homebrew or can be built from scratch.
             # Check if installed and set compiler flags accordingly
             llvmm1 = get_m1_llvm_path(language)
             if llvmm1 and language == 'openmp':
-                self.ldflags += ['-mcpu=apple-m1', '-fopenmp', '-L%s' % llvmm1['libs']]
+                mx = platform.march
+                self.ldflags += ['-mcpu=apple-%s' % mx,
+                                 '-fopenmp', '-L%s' % llvmm1['libs']]
                 self.cflags += ['-Xclang', '-I%s' % llvmm1['include']]
         else:
             if platform in [POWER8, POWER9]:
@@ -895,7 +897,7 @@ def __new__(cls, *args, **kwargs):
         platform = kwargs.pop('platform', configuration['platform'])
         language = kwargs.pop('language', configuration['language'])
 
-        if platform is M1:
+        if isinstance(platform, AppleArm):
             _base = ClangCompiler
         elif isinstance(platform, IntelDevice):
             _base = OneapiCompiler

From 33ab5b311589ba6120ec57281617f0fce091a721 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 12 Jan 2024 12:37:06 -0500
Subject: [PATCH 3/3] misc: prevent arithmetic reductions on non float

---
 devito/builtins/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/devito/builtins/utils.py b/devito/builtins/utils.py
index 84c336b15c..edc1bfd9ed 100644
--- a/devito/builtins/utils.py
+++ b/devito/builtins/utils.py
@@ -27,7 +27,7 @@ def __init__(self, *functions, op=dv.mpi.MPI.SUM, dtype=None):
         else:
             dtype = {f.dtype for f in functions}
             if len(dtype) == 1:
-                self.dtype = dtype.pop()
+                self.dtype = np.result_type(dtype.pop(), np.float32).type
             else:
                 raise ValueError("Illegal mixed data types")
         self.v = None
@@ -37,7 +37,7 @@ def __enter__(self):
         i = dv.Dimension(name='mri',)
         self.n = dv.Function(name='n', shape=(1,), dimensions=(i,),
                              grid=self.grid, dtype=self.dtype, space='host')
-        self.n.data[0] = 0
+        self.n.data[:] = 0
         return self
 
     def __exit__(self, exc_type, exc_value, traceback):