From 0f36c91ffe366858f8477350ccf99de27070643d Mon Sep 17 00:00:00 2001
From: Will Benfold <will.benfold@metoffice.gov.uk>
Date: Fri, 5 Nov 2021 13:23:38 +0000
Subject: [PATCH 1/7] Refactor regrid.py's regrid_area_weighted_rectilinear to
 move weight array construction and src_cube index construction into prepare
 step

---
 lib/iris/analysis/_area_weighted.py |   2 +
 lib/iris/experimental/regrid.py     | 431 ++++++++++++++++------------
 2 files changed, 254 insertions(+), 179 deletions(-)

diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py
index 93459c5cc8..ae162f6c53 100644
--- a/lib/iris/analysis/_area_weighted.py
+++ b/lib/iris/analysis/_area_weighted.py
@@ -71,6 +71,7 @@ def __init__(self, src_grid_cube, target_grid_cube, mdtol=1):
             self.meshgrid_x,
             self.meshgrid_y,
             self.weights_info,
+            self.index_info,
         ) = _regrid_info
 
     def __call__(self, cube):
@@ -124,6 +125,7 @@ def __call__(self, cube):
             self.meshgrid_x,
             self.meshgrid_y,
             self.weights_info,
+            self.index_info,
         )
         return _regrid_area_weighted_rectilinear_src_and_grid__perform(
             cube, _regrid_info, mdtol=self._mdtol
diff --git a/lib/iris/experimental/regrid.py b/lib/iris/experimental/regrid.py
index b484b9eb78..38501e957a 100644
--- a/lib/iris/experimental/regrid.py
+++ b/lib/iris/experimental/regrid.py
@@ -403,7 +403,9 @@ def _weighted_mean_with_mdtol(data, weights, axis=None, mdtol=0):
     return res
 
 
-def _regrid_area_weighted_array(src_data, x_dim, y_dim, weights_info, mdtol=0):
+def _regrid_area_weighted_array(
+    src_data, x_dim, y_dim, weights_info, index_info, mdtol=0
+):
     """
     Regrid the given data from its source grid to a new grid using
     an area weighted mean to determine the resulting data values.
@@ -444,13 +446,19 @@ def _regrid_area_weighted_array(src_data, x_dim, y_dim, weights_info, mdtol=0):
 
     """
     (
-        cached_x_indices,
-        cached_y_indices,
-        max_x_indices,
-        max_y_indices,
-        cached_weights,
+        blank_weights,
+        src_area_weights,
+        new_data_mask_basis,
     ) = weights_info
 
+    (
+        result_x_extent,
+        result_y_extent,
+        square_data_indices_y,
+        square_data_indices_x,
+        src_area_datas_required,
+    ) = index_info
+
     # Ensure we have x_dim and y_dim.
     x_dim_orig = x_dim
     y_dim_orig = y_dim
@@ -479,60 +487,47 @@ def _regrid_area_weighted_array(src_data, x_dim, y_dim, weights_info, mdtol=0):
     # Note that dtype is not preserved and that the array mask
     # allows for regions that do not overlap.
     new_shape = list(src_data.shape)
-    new_shape[x_dim] = len(cached_x_indices)
-    new_shape[y_dim] = len(cached_y_indices)
-    num_target_pts = len(cached_y_indices) * len(cached_x_indices)
-    src_areas_shape = list(src_data.shape)
-    src_areas_shape[y_dim] = max_y_indices
-    src_areas_shape[x_dim] = max_x_indices
-    src_areas_shape += [num_target_pts]
+    new_shape[x_dim] = result_x_extent
+    new_shape[y_dim] = result_y_extent
+
     # Use input cube dtype or convert values to the smallest possible float
     # dtype when necessary.
     dtype = np.promote_types(src_data.dtype, np.float16)
-    # Create empty arrays to hold src_data per target point, and weights
-    src_area_datas = np.zeros(src_areas_shape, dtype=np.float64)
-    src_area_weights = np.zeros(
-        list((max_y_indices, max_x_indices, num_target_pts))
+
+    # Axes of data over which the weighted mean is calculated.
+    axis = (y_dim, x_dim)
+
+    # Use previously established indices
+
+    src_area_datas_square = src_data[
+        ..., square_data_indices_y, square_data_indices_x
+    ]
+
+    _, src_area_datas_required = np.broadcast_arrays(
+        src_area_datas_square, src_area_datas_required
+    )
+
+    src_area_datas = np.where(
+        src_area_datas_required, src_area_datas_square, 0
     )
 
     # Flag to indicate whether the original data was a masked array.
     src_masked = src_data.mask.any() if ma.isMaskedArray(src_data) else False
     if src_masked:
-        src_area_masks = np.full(src_areas_shape, True, dtype=np.bool_)
-    else:
-        new_data_mask = np.full(new_shape, False, dtype=np.bool_)
+        src_area_masks_square = src_data.mask[
+            ..., square_data_indices_y, square_data_indices_x
+        ]
+        src_area_masks = np.where(
+            src_area_datas_required, src_area_masks_square, True
+        )
 
-    # Axes of data over which the weighted mean is calculated.
-    axis = (y_dim, x_dim)
+    else:
+        # If the weights were originally blank, set the weights to all 1 to
+        # avoid divide by 0 error and set the new data mask for making the
+        # values 0
+        src_area_weights = np.where(blank_weights, 1, src_area_weights)
 
-    # Stack the src_area data and weights for each target point
-    target_pt_ji = -1
-    for j, y_indices in enumerate(cached_y_indices):
-        for i, x_indices in enumerate(cached_x_indices):
-            target_pt_ji += 1
-            # Determine whether to mask element i, j based on whether
-            # there are valid weights.
-            weights = cached_weights[j][i]
-            if isinstance(weights, bool) and not weights:
-                if not src_masked:
-                    # Cheat! Fill the data with zeros and weights as one.
-                    # The weighted average result will be the same, but
-                    # we avoid dividing by zero.
-                    src_area_weights[..., target_pt_ji] = 1
-                    new_data_mask[..., j, i] = True
-            else:
-                # Calculate weighted mean of data points.
-                # Slice out relevant data (this may or may not be a view()
-                # depending on x_indices being a slice or not).
-                data = src_data[..., y_indices, x_indices]
-                len_x = data.shape[-1]
-                len_y = data.shape[-2]
-                src_area_datas[..., 0:len_y, 0:len_x, target_pt_ji] = data
-                src_area_weights[0:len_y, 0:len_x, target_pt_ji] = weights
-                if src_masked:
-                    src_area_masks[
-                        ..., 0:len_y, 0:len_x, target_pt_ji
-                    ] = data.mask
+        new_data_mask = np.broadcast_to(new_data_mask_basis, new_shape)
 
     # Broadcast the weights array to allow numpy's ma.average
     # to be called.
@@ -758,145 +753,220 @@ def _regrid_area_weighted_rectilinear_src_and_grid__prepare(
     else:
         area_func = _cartesian_area
 
-    def _calculate_regrid_area_weighted_weights(
-        src_x_bounds,
-        src_y_bounds,
-        grid_x_bounds,
-        grid_y_bounds,
-        grid_x_decreasing,
-        grid_y_decreasing,
-        area_func,
-        circular=False,
-    ):
-        """
-        Compute the area weights used for area-weighted regridding.
+    # Recap
+    # * src_x_bounds:
+    #     A NumPy array of bounds along the X axis defining the source grid.
+    # * src_y_bounds:
+    #     A NumPy array of bounds along the Y axis defining the source grid.
+    # * grid_x_bounds:
+    #     A NumPy array of bounds along the X axis defining the new grid.
+    # * grid_y_bounds:
+    #     A NumPy array of bounds along the Y axis defining the new grid.
+    # * grid_x_decreasing:
+    #     Boolean indicating whether the X coordinate of the new grid is
+    #     in descending order.
+    # * grid_y_decreasing:
+    #     Boolean indicating whether the Y coordinate of the new grid is
+    #     in descending order.
+    # * area_func:
+    #     A function that returns an (p, q) array of weights given an (p, 2)
+    #     shaped array of Y bounds and an (q, 2) shaped array of X bounds.
+    # * circular:
+    #     A boolean indicating whether the `src_x_bounds` are periodic.
+    #     Default is False.
+
+    # Determine which grid bounds are within src extent.
+    y_within_bounds = _within_bounds(
+        src_y_bounds, grid_y_bounds, grid_y_decreasing
+    )
+    x_within_bounds = _within_bounds(
+        src_x_bounds, grid_x_bounds, grid_x_decreasing
+    )
 
-        Args:
+    # Cache which src_bounds are within grid bounds
+    cached_x_bounds = []
+    cached_x_indices = []
+    max_x_indices = 0
+    for (x_0, x_1) in grid_x_bounds:
+        if grid_x_decreasing:
+            x_0, x_1 = x_1, x_0
+        x_bounds, x_indices = _cropped_bounds(src_x_bounds, x_0, x_1)
+        cached_x_bounds.append(x_bounds)
+        cached_x_indices.append(x_indices)
+        # Keep record of the largest slice
+        if isinstance(x_indices, slice):
+            x_indices_size = np.sum(x_indices.stop - x_indices.start)
+        else:  # is tuple of indices
+            x_indices_size = len(x_indices)
+        if x_indices_size > max_x_indices:
+            max_x_indices = x_indices_size
+
+    # Cache which y src_bounds areas and weights are within grid bounds
+    cached_y_indices = []
+    cached_weights = []
+    max_y_indices = 0
+    for j, (y_0, y_1) in enumerate(grid_y_bounds):
+        # Reverse lower and upper if dest grid is decreasing.
+        if grid_y_decreasing:
+            y_0, y_1 = y_1, y_0
+        y_bounds, y_indices = _cropped_bounds(src_y_bounds, y_0, y_1)
+        cached_y_indices.append(y_indices)
+        # Keep record of the largest slice
+        if isinstance(y_indices, slice):
+            y_indices_size = np.sum(y_indices.stop - y_indices.start)
+        else:  # is tuple of indices
+            y_indices_size = len(y_indices)
+        if y_indices_size > max_y_indices:
+            max_y_indices = y_indices_size
+
+        weights_i = []
+        for i, (x_0, x_1) in enumerate(grid_x_bounds):
+            # Reverse lower and upper if dest grid is decreasing.
+            if grid_x_decreasing:
+                x_0, x_1 = x_1, x_0
+            x_bounds = cached_x_bounds[i]
+            x_indices = cached_x_indices[i]
+
+            # Determine whether element i, j overlaps with src and hence
+            # an area weight should be computed.
+            # If x_0 > x_1 then we want [0]->x_1 and x_0->[0] + mod in the case
+            # of wrapped longitudes. However if the src grid is not global
+            # (i.e. circular) this new cell would include a region outside of
+            # the extent of the src grid and thus the weight is therefore
+            # invalid.
+            outside_extent = x_0 > x_1 and not circular
+            if (
+                outside_extent
+                or not y_within_bounds[j]
+                or not x_within_bounds[i]
+            ):
+                weights = False
+            else:
+                # Calculate weights based on areas of cropped bounds.
+                if isinstance(x_indices, tuple) and isinstance(
+                    y_indices, tuple
+                ):
+                    raise RuntimeError(
+                        "Cannot handle split bounds " "in both x and y."
+                    )
+                weights = area_func(y_bounds, x_bounds)
+            weights_i.append(weights)
+        cached_weights.append(weights_i)
 
-        * src_x_bounds:
-            A NumPy array of bounds along the X axis defining the source grid.
-        * src_y_bounds:
-            A NumPy array of bounds along the Y axis defining the source grid.
-        * grid_x_bounds:
-            A NumPy array of bounds along the X axis defining the new grid.
-        * grid_y_bounds:
-            A NumPy array of bounds along the Y axis defining the new grid.
-        * grid_x_decreasing:
-            Boolean indicating whether the X coordinate of the new grid is
-            in descending order.
-        * grid_y_decreasing:
-            Boolean indicating whether the Y coordinate of the new grid is
-            in descending order.
-        * area_func:
-            A function that returns an (p, q) array of weights given an (p, 2)
-            shaped array of Y bounds and an (q, 2) shaped array of X bounds.
-
-        Kwargs:
-
-        * circular:
-            A boolean indicating whether the `src_x_bounds` are periodic.
-            Default is False.
+    # Go further, calculating the full weights array that we'll need in the
+    # perform step and the indices we'll need to extract from the cube we're
+    # regridding (src_data)
 
-        Returns:
-            The area weights to be used for area-weighted regridding.
+    result_y_extent = len(cached_y_indices)
+    result_x_extent = len(cached_x_indices)
 
-        """
-        # Determine which grid bounds are within src extent.
-        y_within_bounds = _within_bounds(
-            src_y_bounds, grid_y_bounds, grid_y_decreasing
-        )
-        x_within_bounds = _within_bounds(
-            src_x_bounds, grid_x_bounds, grid_x_decreasing
-        )
+    # Total number of points
+    num_target_pts = result_y_extent * result_x_extent
 
-        # Cache which src_bounds are within grid bounds
-        cached_x_bounds = []
-        cached_x_indices = []
-        max_x_indices = 0
-        for (x_0, x_1) in grid_x_bounds:
-            if grid_x_decreasing:
-                x_0, x_1 = x_1, x_0
-            x_bounds, x_indices = _cropped_bounds(src_x_bounds, x_0, x_1)
-            cached_x_bounds.append(x_bounds)
-            cached_x_indices.append(x_indices)
-            # Keep record of the largest slice
-            if isinstance(x_indices, slice):
-                x_indices_size = np.sum(x_indices.stop - x_indices.start)
-            else:  # is tuple of indices
-                x_indices_size = len(x_indices)
-            if x_indices_size > max_x_indices:
-                max_x_indices = x_indices_size
-
-        # Cache which y src_bounds areas and weights are within grid bounds
-        cached_y_indices = []
-        cached_weights = []
-        max_y_indices = 0
-        for j, (y_0, y_1) in enumerate(grid_y_bounds):
-            # Reverse lower and upper if dest grid is decreasing.
-            if grid_y_decreasing:
-                y_0, y_1 = y_1, y_0
-            y_bounds, y_indices = _cropped_bounds(src_y_bounds, y_0, y_1)
-            cached_y_indices.append(y_indices)
-            # Keep record of the largest slice
-            if isinstance(y_indices, slice):
-                y_indices_size = np.sum(y_indices.stop - y_indices.start)
-            else:  # is tuple of indices
-                y_indices_size = len(y_indices)
-            if y_indices_size > max_y_indices:
-                max_y_indices = y_indices_size
-
-            weights_i = []
-            for i, (x_0, x_1) in enumerate(grid_x_bounds):
-                # Reverse lower and upper if dest grid is decreasing.
-                if grid_x_decreasing:
-                    x_0, x_1 = x_1, x_0
-                x_bounds = cached_x_bounds[i]
-                x_indices = cached_x_indices[i]
-
-                # Determine whether element i, j overlaps with src and hence
-                # an area weight should be computed.
-                # If x_0 > x_1 then we want [0]->x_1 and x_0->[0] + mod in the case
-                # of wrapped longitudes. However if the src grid is not global
-                # (i.e. circular) this new cell would include a region outside of
-                # the extent of the src grid and thus the weight is therefore
-                # invalid.
-                outside_extent = x_0 > x_1 and not circular
-                if (
-                    outside_extent
-                    or not y_within_bounds[j]
-                    or not x_within_bounds[i]
-                ):
-                    weights = False
+    # Create empty array to hold weights
+    src_area_weights = np.zeros(
+        list((max_y_indices, max_x_indices, num_target_pts))
+    )
+
+    # Built for the case where the source cube isn't masked
+    blank_weights = np.zeros((num_target_pts,))
+    new_data_mask_basis = np.full(
+        (len(cached_y_indices), len(cached_x_indices)), False, dtype=np.bool_
+    )
+
+    # To permit fancy indexing, we're going to store our data in an array of
+    # the size of the biggest. This means we need to track whether the data in
+    # that array is actually required and build those squared-off arrays
+    # TODO: Consider if a proper mask would be better
+    src_area_datas_required = np.full(
+        (max_y_indices, max_x_indices, num_target_pts), False
+    )
+    square_data_indices_y = np.zeros(
+        (max_y_indices, max_x_indices, num_target_pts), dtype=int
+    )
+    square_data_indices_x = np.zeros(
+        (max_y_indices, max_x_indices, num_target_pts), dtype=int
+    )
+
+    # Stack the weights for each target point and build the indices we'll need
+    # to extract the src_area_data
+    target_pt_ji = -1
+    for j, y_indices in enumerate(cached_y_indices):
+        for i, x_indices in enumerate(cached_x_indices):
+            target_pt_ji += 1
+            # Determine whether to mask element i, j based on whether
+            # there are valid weights.
+            weights = cached_weights[j][i]
+            if isinstance(weights, bool) and not weights:
+                # Prepare for the src_data not being masked by storing the
+                # information that will let us fill the data with zeros and
+                # weights as one. The weighted average result will be the same,
+                # but we avoid dividing by zero.
+                blank_weights[target_pt_ji] = True
+                new_data_mask_basis[j, i] = True
+            else:
+                # Establish which indices are actually in y_indices and x_indices
+                if isinstance(y_indices, slice):
+                    y_indices = list(
+                        range(
+                            y_indices.start,
+                            y_indices.stop,
+                            y_indices.step or 1,
+                        )
+                    )
                 else:
-                    # Calculate weights based on areas of cropped bounds.
-                    if isinstance(x_indices, tuple) and isinstance(
-                        y_indices, tuple
-                    ):
-                        raise RuntimeError(
-                            "Cannot handle split bounds " "in both x and y."
+                    y_indices = list(y_indices)
+
+                if isinstance(x_indices, slice):
+                    x_indices = list(
+                        range(
+                            x_indices.start,
+                            x_indices.stop,
+                            x_indices.step or 1,
                         )
-                    weights = area_func(y_bounds, x_bounds)
-                weights_i.append(weights)
-            cached_weights.append(weights_i)
-        return (
-            tuple(cached_x_indices),
-            tuple(cached_y_indices),
-            max_x_indices,
-            max_y_indices,
-            tuple(cached_weights),
-        )
+                    )
+                else:
+                    x_indices = list(x_indices)
+
+                # For the weights, we just need the lengths of these as we'd
+                # dropping them into a pre-made array
 
-    weights_info = _calculate_regrid_area_weighted_weights(
-        src_x_bounds,
-        src_y_bounds,
-        grid_x_bounds,
-        grid_y_bounds,
-        grid_x_decreasing,
-        grid_y_decreasing,
-        area_func,
-        circular,
+                len_y = len(y_indices)
+                len_x = len(x_indices)
+
+                src_area_weights[0:len_y, 0:len_x, target_pt_ji] = weights
+
+                # To build the indices for the source cube, we need equal
+                # shaped array so we pad with 0s and record the need to mask
+                # them in src_area_datas_required
+                padded_y_indices = y_indices + [0] * (max_y_indices - len_y)
+                padded_x_indices = x_indices + [0] * (max_x_indices - len_x)
+
+                square_data_indices_y[..., target_pt_ji] = np.array(
+                    padded_y_indices
+                )[:, np.newaxis]
+                square_data_indices_x[..., target_pt_ji] = padded_x_indices
+
+                src_area_datas_required[0:len_y, 0:len_x, target_pt_ji] = True
+
+    # Package up the return data
+
+    weights_info = (
+        blank_weights,
+        src_area_weights,
+        new_data_mask_basis,
     )
 
+    index_info = (
+        result_x_extent,
+        result_y_extent,
+        square_data_indices_y,
+        square_data_indices_x,
+        src_area_datas_required,
+    )
+
+    # Now return it
+
     return (
         src_x,
         src_y,
@@ -907,6 +977,7 @@ def _calculate_regrid_area_weighted_weights(
         meshgrid_x,
         meshgrid_y,
         weights_info,
+        index_info,
     )
 
 
@@ -929,6 +1000,7 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform(
         meshgrid_x,
         meshgrid_y,
         weights_info,
+        index_info,
     ) = regrid_info
 
     # Calculate new data array for regridded cube.
@@ -937,6 +1009,7 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform(
         x_dim=src_x_dim,
         y_dim=src_y_dim,
         weights_info=weights_info,
+        index_info=index_info,
         mdtol=mdtol,
     )
 

From 68a7a5cf12e1aa4da22d018b4dfb19a1c3c67471 Mon Sep 17 00:00:00 2001
From: Will Benfold <will.benfold@metoffice.gov.uk>
Date: Fri, 5 Nov 2021 13:56:55 +0000
Subject: [PATCH 2/7] Update test to expect the additional output value from
 prepare step

---
 .../unit/analysis/area_weighted/test_AreaWeightedRegridder.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py
index 2fbfbacfd2..7fada79be6 100644
--- a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py
+++ b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py
@@ -50,7 +50,7 @@ def check_mdtol(self, mdtol=None):
                 src_grid, target_grid
             )
         )
-        self.assertEqual(len(_regrid_info), 9)
+        self.assertEqual(len(_regrid_info), 10)
         with mock.patch(
             "iris.experimental.regrid."
             "_regrid_area_weighted_rectilinear_src_and_grid__prepare",

From d058b2ef9c88818e104cfe6697ebe63610219ef2 Mon Sep 17 00:00:00 2001
From: Will Benfold <will.benfold@metoffice.gov.uk>
Date: Tue, 9 Nov 2021 16:24:22 +0000
Subject: [PATCH 3/7] Integration tests of area weighted regrid workflow (open,
 regrid, save)

---
 .../analysis/test_area_weighted.py            | 73 +++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 lib/iris/tests/integration/analysis/test_area_weighted.py

diff --git a/lib/iris/tests/integration/analysis/test_area_weighted.py b/lib/iris/tests/integration/analysis/test_area_weighted.py
new file mode 100644
index 0000000000..304419fa92
--- /dev/null
+++ b/lib/iris/tests/integration/analysis/test_area_weighted.py
@@ -0,0 +1,73 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""Integration tests for area weighted regridding."""
+
+# Import iris.tests first so that some things can be initialised before
+# importing anything else.
+import iris.tests as tests  # isort:skip
+
+import tempfile
+
+import iris
+from iris.analysis import AreaWeighted
+
+
+@tests.skip_data
+class AreaWeightedTests(tests.IrisTest):
+    def setup(self):
+        # Prepare a cube and a template
+
+        cube_file_path = tests.get_data_path(
+            ["NetCDF", "regrid", "regrid_xyt.nc"]
+        )
+        self.cube = iris.load_cube(cube_file_path)
+
+        template_file_path = tests.get_data_path(
+            ["NetCDF", "regrid", "regrid_template_global_latlon.nc"]
+        )
+        self.template_cube = iris.load_cube(template_file_path)
+
+    def test_regrid_area_w_lazy(self):
+        # Regrid the cube onto the template.
+        out = self.cube.regrid(self.template_cube, AreaWeighted())
+        # Check data is still lazy
+        self.assertTrue(self.cube.has_lazy_data())
+        self.assertTrue(out.has_lazy_data())
+        # Save the data
+        with tempfile.TemporaryFile(suffix=".nc") as fp:
+            iris.save(out, fp)
+
+    def test_regrid_area_w_lazy_chunked(self):
+        # Chunked data makes the regridder run repeatedly
+        self.cube.data = self.cube.lazy_data().rechunk((1, -1, -1))
+        # Regrid the cube onto the template.
+        out = self.cube.regrid(self.template_cube, AreaWeighted())
+        # Check data is still lazy
+        self.assertTrue(self.cube.has_lazy_data())
+        self.assertTrue(out.has_lazy_data())
+        # Save the data
+        with tempfile.TemporaryFile(suffix=".nc") as fp:
+            iris.save(out, fp)
+
+    def test_regrid_area_w_real_save(self):
+        real_cube = self.cube.copy()
+        real_cube.data
+        # Regrid the cube onto the template.
+        out = real_cube.regrid(self.template_cube, AreaWeighted())
+        # Realise the data
+        out.data
+        # Save the data
+        with tempfile.TemporaryFile(suffix=".nc") as fp:
+            iris.save(out, fp)
+
+    def test_regrid_area_w_real_start(self):
+        real_cube = self.cube.copy()
+        real_cube.data
+        # Regrid the cube onto the template.
+        out = real_cube.regrid(self.template_cube, AreaWeighted())
+        # Save the data
+        with tempfile.TemporaryFile(suffix=".nc") as fp:
+            iris.save(out, fp)

From 7d1c2eabc1175464d74e01c5ff524ff1194d923b Mon Sep 17 00:00:00 2001
From: Will Benfold <will.benfold@metoffice.gov.uk>
Date: Wed, 10 Nov 2021 12:19:23 +0000
Subject: [PATCH 4/7] Unit tests to check source cube and output cube are lazy
 if not explicitly realised

---
 .../test_AreaWeightedRegridder.py             | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py
index 7fada79be6..4b1dba87e8 100644
--- a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py
+++ b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py
@@ -15,6 +15,7 @@
 
 import numpy as np
 
+from iris import load_cube
 from iris.analysis._area_weighted import AreaWeightedRegridder
 from iris.coord_systems import GeogCS
 from iris.coords import DimCoord
@@ -247,5 +248,41 @@ def test_src_data_different_dims(self):
         )
 
 
+@tests.skip_data
+class TestLazy(tests.IrisTest):
+    # Setup
+    def setup(self) -> None:
+        # Prepare a cube and a template
+
+        cube_file_path = tests.get_data_path(
+            ["NetCDF", "regrid", "regrid_xyt.nc"]
+        )
+        self.cube = load_cube(cube_file_path)
+
+        template_file_path = tests.get_data_path(
+            ["NetCDF", "regrid", "regrid_template_global_latlon.nc"]
+        )
+        self.template_cube = load_cube(template_file_path)
+
+        # Chunked data makes the regridder run repeatedly
+        self.cube.data = self.cube.lazy_data().rechunk((1, -1, -1))
+
+    def test_src_stays_lazy(self) -> None:
+        cube = self.cube.copy()
+        # Regrid the cube onto the template.
+        regridder = AreaWeightedRegridder(cube, self.template_cube)
+        regridder.regrid(cube)
+        # Base cube stays lazy
+        self.assertTrue(cube.has_lazy_data())
+
+    def test_output_lazy(self) -> None:
+        cube = self.cube.copy()
+        # Regrid the cube onto the template.
+        regridder = AreaWeightedRegridder(cube, self.template_cube)
+        out = regridder.regrid(cube)
+        # Lazy base cube means lazy output
+        self.assertTrue(out.has_lazy_data())
+
+
 if __name__ == "__main__":
     tests.main()

From 4fddc7fb96328236a0e69026710c5ef1803df5df Mon Sep 17 00:00:00 2001
From: Will Benfold <will.benfold@metoffice.gov.uk>
Date: Thu, 11 Nov 2021 12:13:55 +0000
Subject: [PATCH 5/7] Restore the function defined within a function to improve
 diff

---
 lib/iris/experimental/regrid.py | 229 +++++++++++++++++++-------------
 1 file changed, 134 insertions(+), 95 deletions(-)

diff --git a/lib/iris/experimental/regrid.py b/lib/iris/experimental/regrid.py
index 38501e957a..ca8e2dd2ae 100644
--- a/lib/iris/experimental/regrid.py
+++ b/lib/iris/experimental/regrid.py
@@ -753,105 +753,144 @@ def _regrid_area_weighted_rectilinear_src_and_grid__prepare(
     else:
         area_func = _cartesian_area
 
-    # Recap
-    # * src_x_bounds:
-    #     A NumPy array of bounds along the X axis defining the source grid.
-    # * src_y_bounds:
-    #     A NumPy array of bounds along the Y axis defining the source grid.
-    # * grid_x_bounds:
-    #     A NumPy array of bounds along the X axis defining the new grid.
-    # * grid_y_bounds:
-    #     A NumPy array of bounds along the Y axis defining the new grid.
-    # * grid_x_decreasing:
-    #     Boolean indicating whether the X coordinate of the new grid is
-    #     in descending order.
-    # * grid_y_decreasing:
-    #     Boolean indicating whether the Y coordinate of the new grid is
-    #     in descending order.
-    # * area_func:
-    #     A function that returns an (p, q) array of weights given an (p, 2)
-    #     shaped array of Y bounds and an (q, 2) shaped array of X bounds.
-    # * circular:
-    #     A boolean indicating whether the `src_x_bounds` are periodic.
-    #     Default is False.
-
-    # Determine which grid bounds are within src extent.
-    y_within_bounds = _within_bounds(
-        src_y_bounds, grid_y_bounds, grid_y_decreasing
-    )
-    x_within_bounds = _within_bounds(
-        src_x_bounds, grid_x_bounds, grid_x_decreasing
-    )
+    def _calculate_regrid_area_weighted_weights(
+        src_x_bounds,
+        src_y_bounds,
+        grid_x_bounds,
+        grid_y_bounds,
+        grid_x_decreasing,
+        grid_y_decreasing,
+        area_func,
+        circular=False,
+    ):
+        """
+        Compute the area weights used for area-weighted regridding.
+        Args:
+        * src_x_bounds:
+            A NumPy array of bounds along the X axis defining the source grid.
+        * src_y_bounds:
+            A NumPy array of bounds along the Y axis defining the source grid.
+        * grid_x_bounds:
+            A NumPy array of bounds along the X axis defining the new grid.
+        * grid_y_bounds:
+            A NumPy array of bounds along the Y axis defining the new grid.
+        * grid_x_decreasing:
+            Boolean indicating whether the X coordinate of the new grid is
+            in descending order.
+        * grid_y_decreasing:
+            Boolean indicating whether the Y coordinate of the new grid is
+            in descending order.
+        * area_func:
+            A function that returns an (p, q) array of weights given an (p, 2)
+            shaped array of Y bounds and an (q, 2) shaped array of X bounds.
+        Kwargs:
+        * circular:
+            A boolean indicating whether the `src_x_bounds` are periodic.
+            Default is False.
+        Returns:
+            The area weights to be used for area-weighted regridding.
+        """
+        # Determine which grid bounds are within src extent.
+        y_within_bounds = _within_bounds(
+            src_y_bounds, grid_y_bounds, grid_y_decreasing
+        )
+        x_within_bounds = _within_bounds(
+            src_x_bounds, grid_x_bounds, grid_x_decreasing
+        )
 
-    # Cache which src_bounds are within grid bounds
-    cached_x_bounds = []
-    cached_x_indices = []
-    max_x_indices = 0
-    for (x_0, x_1) in grid_x_bounds:
-        if grid_x_decreasing:
-            x_0, x_1 = x_1, x_0
-        x_bounds, x_indices = _cropped_bounds(src_x_bounds, x_0, x_1)
-        cached_x_bounds.append(x_bounds)
-        cached_x_indices.append(x_indices)
-        # Keep record of the largest slice
-        if isinstance(x_indices, slice):
-            x_indices_size = np.sum(x_indices.stop - x_indices.start)
-        else:  # is tuple of indices
-            x_indices_size = len(x_indices)
-        if x_indices_size > max_x_indices:
-            max_x_indices = x_indices_size
-
-    # Cache which y src_bounds areas and weights are within grid bounds
-    cached_y_indices = []
-    cached_weights = []
-    max_y_indices = 0
-    for j, (y_0, y_1) in enumerate(grid_y_bounds):
-        # Reverse lower and upper if dest grid is decreasing.
-        if grid_y_decreasing:
-            y_0, y_1 = y_1, y_0
-        y_bounds, y_indices = _cropped_bounds(src_y_bounds, y_0, y_1)
-        cached_y_indices.append(y_indices)
-        # Keep record of the largest slice
-        if isinstance(y_indices, slice):
-            y_indices_size = np.sum(y_indices.stop - y_indices.start)
-        else:  # is tuple of indices
-            y_indices_size = len(y_indices)
-        if y_indices_size > max_y_indices:
-            max_y_indices = y_indices_size
-
-        weights_i = []
-        for i, (x_0, x_1) in enumerate(grid_x_bounds):
-            # Reverse lower and upper if dest grid is decreasing.
+        # Cache which src_bounds are within grid bounds
+        cached_x_bounds = []
+        cached_x_indices = []
+        max_x_indices = 0
+        for (x_0, x_1) in grid_x_bounds:
             if grid_x_decreasing:
                 x_0, x_1 = x_1, x_0
-            x_bounds = cached_x_bounds[i]
-            x_indices = cached_x_indices[i]
-
-            # Determine whether element i, j overlaps with src and hence
-            # an area weight should be computed.
-            # If x_0 > x_1 then we want [0]->x_1 and x_0->[0] + mod in the case
-            # of wrapped longitudes. However if the src grid is not global
-            # (i.e. circular) this new cell would include a region outside of
-            # the extent of the src grid and thus the weight is therefore
-            # invalid.
-            outside_extent = x_0 > x_1 and not circular
-            if (
-                outside_extent
-                or not y_within_bounds[j]
-                or not x_within_bounds[i]
-            ):
-                weights = False
-            else:
-                # Calculate weights based on areas of cropped bounds.
-                if isinstance(x_indices, tuple) and isinstance(
-                    y_indices, tuple
+            x_bounds, x_indices = _cropped_bounds(src_x_bounds, x_0, x_1)
+            cached_x_bounds.append(x_bounds)
+            cached_x_indices.append(x_indices)
+            # Keep record of the largest slice
+            if isinstance(x_indices, slice):
+                x_indices_size = np.sum(x_indices.stop - x_indices.start)
+            else:  # is tuple of indices
+                x_indices_size = len(x_indices)
+            if x_indices_size > max_x_indices:
+                max_x_indices = x_indices_size
+
+        # Cache which y src_bounds areas and weights are within grid bounds
+        cached_y_indices = []
+        cached_weights = []
+        max_y_indices = 0
+        for j, (y_0, y_1) in enumerate(grid_y_bounds):
+            # Reverse lower and upper if dest grid is decreasing.
+            if grid_y_decreasing:
+                y_0, y_1 = y_1, y_0
+            y_bounds, y_indices = _cropped_bounds(src_y_bounds, y_0, y_1)
+            cached_y_indices.append(y_indices)
+            # Keep record of the largest slice
+            if isinstance(y_indices, slice):
+                y_indices_size = np.sum(y_indices.stop - y_indices.start)
+            else:  # is tuple of indices
+                y_indices_size = len(y_indices)
+            if y_indices_size > max_y_indices:
+                max_y_indices = y_indices_size
+
+            weights_i = []
+            for i, (x_0, x_1) in enumerate(grid_x_bounds):
+                # Reverse lower and upper if dest grid is decreasing.
+                if grid_x_decreasing:
+                    x_0, x_1 = x_1, x_0
+                x_bounds = cached_x_bounds[i]
+                x_indices = cached_x_indices[i]
+
+                # Determine whether element i, j overlaps with src and hence
+                # an area weight should be computed.
+                # If x_0 > x_1 then we want [0]->x_1 and x_0->[0] + mod in the case
+                # of wrapped longitudes. However if the src grid is not global
+                # (i.e. circular) this new cell would include a region outside of
+                # the extent of the src grid and thus the weight is therefore
+                # invalid.
+                outside_extent = x_0 > x_1 and not circular
+                if (
+                    outside_extent
+                    or not y_within_bounds[j]
+                    or not x_within_bounds[i]
                 ):
-                    raise RuntimeError(
-                        "Cannot handle split bounds " "in both x and y."
-                    )
-                weights = area_func(y_bounds, x_bounds)
-            weights_i.append(weights)
-        cached_weights.append(weights_i)
+                    weights = False
+                else:
+                    # Calculate weights based on areas of cropped bounds.
+                    if isinstance(x_indices, tuple) and isinstance(
+                        y_indices, tuple
+                    ):
+                        raise RuntimeError(
+                            "Cannot handle split bounds " "in both x and y."
+                        )
+                    weights = area_func(y_bounds, x_bounds)
+                weights_i.append(weights)
+            cached_weights.append(weights_i)
+        return (
+            tuple(cached_x_indices),
+            tuple(cached_y_indices),
+            max_x_indices,
+            max_y_indices,
+            tuple(cached_weights),
+        )
+
+    (
+        cached_x_indices,
+        cached_y_indices,
+        max_x_indices,
+        max_y_indices,
+        cached_weights,
+    ) = _calculate_regrid_area_weighted_weights(
+        src_x_bounds,
+        src_y_bounds,
+        grid_x_bounds,
+        grid_y_bounds,
+        grid_x_decreasing,
+        grid_y_decreasing,
+        area_func,
+        circular,
+    )
 
     # Go further, calculating the full weights array that we'll need in the
     # perform step and the indices we'll need to extract from the cube we're

From 5ec6ae9c6f34bd806dd8167c61a3b7cdd2baf21e Mon Sep 17 00:00:00 2001
From: Will Benfold <will.benfold@metoffice.gov.uk>
Date: Thu, 11 Nov 2021 17:09:59 +0000
Subject: [PATCH 6/7] Review changes pt1

---
 lib/iris/experimental/regrid.py                   | 15 +++++++++------
 .../integration/analysis/test_area_weighted.py    |  6 +++++-
 .../area_weighted/test_AreaWeightedRegridder.py   |  2 +-
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/lib/iris/experimental/regrid.py b/lib/iris/experimental/regrid.py
index ca8e2dd2ae..9a02fbd3b1 100644
--- a/lib/iris/experimental/regrid.py
+++ b/lib/iris/experimental/regrid.py
@@ -896,8 +896,8 @@ def _calculate_regrid_area_weighted_weights(
     # perform step and the indices we'll need to extract from the cube we're
     # regridding (src_data)
 
-    result_y_extent = len(cached_y_indices)
-    result_x_extent = len(cached_x_indices)
+    result_y_extent = len(grid_y_bounds)
+    result_x_extent = len(grid_x_bounds)
 
     # Total number of points
     num_target_pts = result_y_extent * result_x_extent
@@ -913,8 +913,11 @@ def _calculate_regrid_area_weighted_weights(
         (len(cached_y_indices), len(cached_x_indices)), False, dtype=np.bool_
     )
 
-    # To permit fancy indexing, we're going to store our data in an array of
-    # the size of the biggest. This means we need to track whether the data in
+    # To permit fancy indexing, we need to store our data in an array whose
+    # first two dimensions represent the indices needed for the target cell.
+    # Since target cells can require a different number of indices, the size of
+    # these dimensions should be the maximum of this number.
+    # This means we need to track whether the data in
     # that array is actually required and build those squared-off arrays
     # TODO: Consider if a proper mask would be better
     src_area_datas_required = np.full(
@@ -936,7 +939,7 @@ def _calculate_regrid_area_weighted_weights(
             # Determine whether to mask element i, j based on whether
             # there are valid weights.
             weights = cached_weights[j][i]
-            if isinstance(weights, bool) and not weights:
+            if weights is False:
                 # Prepare for the src_data not being masked by storing the
                 # information that will let us fill the data with zeros and
                 # weights as one. The weighted average result will be the same,
@@ -967,7 +970,7 @@ def _calculate_regrid_area_weighted_weights(
                 else:
                     x_indices = list(x_indices)
 
-                # For the weights, we just need the lengths of these as we'd
+                # For the weights, we just need the lengths of these as we're
                 # dropping them into a pre-made array
 
                 len_y = len(y_indices)
diff --git a/lib/iris/tests/integration/analysis/test_area_weighted.py b/lib/iris/tests/integration/analysis/test_area_weighted.py
index 304419fa92..d2f1a4ad44 100644
--- a/lib/iris/tests/integration/analysis/test_area_weighted.py
+++ b/lib/iris/tests/integration/analysis/test_area_weighted.py
@@ -17,7 +17,7 @@
 
 @tests.skip_data
 class AreaWeightedTests(tests.IrisTest):
-    def setup(self):
+    def setUp(self):
         # Prepare a cube and a template
 
         cube_file_path = tests.get_data_path(
@@ -71,3 +71,7 @@ def test_regrid_area_w_real_start(self):
         # Save the data
         with tempfile.TemporaryFile(suffix=".nc") as fp:
             iris.save(out, fp)
+
+
+if __name__ == "__main__":
+    tests.main()
diff --git a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py
index 4b1dba87e8..a747e11d7a 100644
--- a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py
+++ b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py
@@ -251,7 +251,7 @@ def test_src_data_different_dims(self):
 @tests.skip_data
 class TestLazy(tests.IrisTest):
     # Setup
-    def setup(self) -> None:
+    def setUp(self) -> None:
         # Prepare a cube and a template
 
         cube_file_path = tests.get_data_path(

From b60b6ad551330513bb1630ebd0f06d72191da2b4 Mon Sep 17 00:00:00 2001
From: Will Benfold <will.benfold@metoffice.gov.uk>
Date: Mon, 15 Nov 2021 12:19:21 +0000
Subject: [PATCH 7/7] Fix how we call the regridder

---
 .../unit/analysis/area_weighted/test_AreaWeightedRegridder.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py
index a747e11d7a..a44ccb32bd 100644
--- a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py
+++ b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py
@@ -271,7 +271,7 @@ def test_src_stays_lazy(self) -> None:
         cube = self.cube.copy()
         # Regrid the cube onto the template.
         regridder = AreaWeightedRegridder(cube, self.template_cube)
-        regridder.regrid(cube)
+        regridder(cube)
         # Base cube stays lazy
         self.assertTrue(cube.has_lazy_data())
 
@@ -279,7 +279,7 @@ def test_output_lazy(self) -> None:
         cube = self.cube.copy()
         # Regrid the cube onto the template.
         regridder = AreaWeightedRegridder(cube, self.template_cube)
-        out = regridder.regrid(cube)
+        out = regridder(cube)
         # Lazy base cube means lazy output
         self.assertTrue(out.has_lazy_data())