diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 93459c5cc8..ae162f6c53 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -71,6 +71,7 @@ def __init__(self, src_grid_cube, target_grid_cube, mdtol=1): self.meshgrid_x, self.meshgrid_y, self.weights_info, + self.index_info, ) = _regrid_info def __call__(self, cube): @@ -124,6 +125,7 @@ def __call__(self, cube): self.meshgrid_x, self.meshgrid_y, self.weights_info, + self.index_info, ) return _regrid_area_weighted_rectilinear_src_and_grid__perform( cube, _regrid_info, mdtol=self._mdtol diff --git a/lib/iris/experimental/regrid.py b/lib/iris/experimental/regrid.py index b484b9eb78..9a02fbd3b1 100644 --- a/lib/iris/experimental/regrid.py +++ b/lib/iris/experimental/regrid.py @@ -403,7 +403,9 @@ def _weighted_mean_with_mdtol(data, weights, axis=None, mdtol=0): return res -def _regrid_area_weighted_array(src_data, x_dim, y_dim, weights_info, mdtol=0): +def _regrid_area_weighted_array( + src_data, x_dim, y_dim, weights_info, index_info, mdtol=0 +): """ Regrid the given data from its source grid to a new grid using an area weighted mean to determine the resulting data values. @@ -444,13 +446,19 @@ def _regrid_area_weighted_array(src_data, x_dim, y_dim, weights_info, mdtol=0): """ ( - cached_x_indices, - cached_y_indices, - max_x_indices, - max_y_indices, - cached_weights, + blank_weights, + src_area_weights, + new_data_mask_basis, ) = weights_info + ( + result_x_extent, + result_y_extent, + square_data_indices_y, + square_data_indices_x, + src_area_datas_required, + ) = index_info + # Ensure we have x_dim and y_dim. x_dim_orig = x_dim y_dim_orig = y_dim @@ -479,60 +487,47 @@ def _regrid_area_weighted_array(src_data, x_dim, y_dim, weights_info, mdtol=0): # Note that dtype is not preserved and that the array mask # allows for regions that do not overlap. new_shape = list(src_data.shape) - new_shape[x_dim] = len(cached_x_indices) - new_shape[y_dim] = len(cached_y_indices) - num_target_pts = len(cached_y_indices) * len(cached_x_indices) - src_areas_shape = list(src_data.shape) - src_areas_shape[y_dim] = max_y_indices - src_areas_shape[x_dim] = max_x_indices - src_areas_shape += [num_target_pts] + new_shape[x_dim] = result_x_extent + new_shape[y_dim] = result_y_extent + # Use input cube dtype or convert values to the smallest possible float # dtype when necessary. dtype = np.promote_types(src_data.dtype, np.float16) - # Create empty arrays to hold src_data per target point, and weights - src_area_datas = np.zeros(src_areas_shape, dtype=np.float64) - src_area_weights = np.zeros( - list((max_y_indices, max_x_indices, num_target_pts)) + + # Axes of data over which the weighted mean is calculated. + axis = (y_dim, x_dim) + + # Use previously established indices + + src_area_datas_square = src_data[ + ..., square_data_indices_y, square_data_indices_x + ] + + _, src_area_datas_required = np.broadcast_arrays( + src_area_datas_square, src_area_datas_required + ) + + src_area_datas = np.where( + src_area_datas_required, src_area_datas_square, 0 ) # Flag to indicate whether the original data was a masked array. src_masked = src_data.mask.any() if ma.isMaskedArray(src_data) else False if src_masked: - src_area_masks = np.full(src_areas_shape, True, dtype=np.bool_) - else: - new_data_mask = np.full(new_shape, False, dtype=np.bool_) + src_area_masks_square = src_data.mask[ + ..., square_data_indices_y, square_data_indices_x + ] + src_area_masks = np.where( + src_area_datas_required, src_area_masks_square, True + ) - # Axes of data over which the weighted mean is calculated. - axis = (y_dim, x_dim) + else: + # If the weights were originally blank, set the weights to all 1 to + # avoid divide by 0 error and set the new data mask for making the + # values 0 + src_area_weights = np.where(blank_weights, 1, src_area_weights) - # Stack the src_area data and weights for each target point - target_pt_ji = -1 - for j, y_indices in enumerate(cached_y_indices): - for i, x_indices in enumerate(cached_x_indices): - target_pt_ji += 1 - # Determine whether to mask element i, j based on whether - # there are valid weights. - weights = cached_weights[j][i] - if isinstance(weights, bool) and not weights: - if not src_masked: - # Cheat! Fill the data with zeros and weights as one. - # The weighted average result will be the same, but - # we avoid dividing by zero. - src_area_weights[..., target_pt_ji] = 1 - new_data_mask[..., j, i] = True - else: - # Calculate weighted mean of data points. - # Slice out relevant data (this may or may not be a view() - # depending on x_indices being a slice or not). - data = src_data[..., y_indices, x_indices] - len_x = data.shape[-1] - len_y = data.shape[-2] - src_area_datas[..., 0:len_y, 0:len_x, target_pt_ji] = data - src_area_weights[0:len_y, 0:len_x, target_pt_ji] = weights - if src_masked: - src_area_masks[ - ..., 0:len_y, 0:len_x, target_pt_ji - ] = data.mask + new_data_mask = np.broadcast_to(new_data_mask_basis, new_shape) # Broadcast the weights array to allow numpy's ma.average # to be called. @@ -770,9 +765,7 @@ def _calculate_regrid_area_weighted_weights( ): """ Compute the area weights used for area-weighted regridding. - Args: - * src_x_bounds: A NumPy array of bounds along the X axis defining the source grid. * src_y_bounds: @@ -790,16 +783,12 @@ def _calculate_regrid_area_weighted_weights( * area_func: A function that returns an (p, q) array of weights given an (p, 2) shaped array of Y bounds and an (q, 2) shaped array of X bounds. - Kwargs: - * circular: A boolean indicating whether the `src_x_bounds` are periodic. Default is False. - Returns: The area weights to be used for area-weighted regridding. - """ # Determine which grid bounds are within src extent. y_within_bounds = _within_bounds( @@ -886,7 +875,13 @@ def _calculate_regrid_area_weighted_weights( tuple(cached_weights), ) - weights_info = _calculate_regrid_area_weighted_weights( + ( + cached_x_indices, + cached_y_indices, + max_x_indices, + max_y_indices, + cached_weights, + ) = _calculate_regrid_area_weighted_weights( src_x_bounds, src_y_bounds, grid_x_bounds, @@ -897,6 +892,123 @@ def _calculate_regrid_area_weighted_weights( circular, ) + # Go further, calculating the full weights array that we'll need in the + # perform step and the indices we'll need to extract from the cube we're + # regridding (src_data) + + result_y_extent = len(grid_y_bounds) + result_x_extent = len(grid_x_bounds) + + # Total number of points + num_target_pts = result_y_extent * result_x_extent + + # Create empty array to hold weights + src_area_weights = np.zeros( + list((max_y_indices, max_x_indices, num_target_pts)) + ) + + # Built for the case where the source cube isn't masked + blank_weights = np.zeros((num_target_pts,)) + new_data_mask_basis = np.full( + (len(cached_y_indices), len(cached_x_indices)), False, dtype=np.bool_ + ) + + # To permit fancy indexing, we need to store our data in an array whose + # first two dimensions represent the indices needed for the target cell. + # Since target cells can require a different number of indices, the size of + # these dimensions should be the maximum of this number. + # This means we need to track whether the data in + # that array is actually required and build those squared-off arrays + # TODO: Consider if a proper mask would be better + src_area_datas_required = np.full( + (max_y_indices, max_x_indices, num_target_pts), False + ) + square_data_indices_y = np.zeros( + (max_y_indices, max_x_indices, num_target_pts), dtype=int + ) + square_data_indices_x = np.zeros( + (max_y_indices, max_x_indices, num_target_pts), dtype=int + ) + + # Stack the weights for each target point and build the indices we'll need + # to extract the src_area_data + target_pt_ji = -1 + for j, y_indices in enumerate(cached_y_indices): + for i, x_indices in enumerate(cached_x_indices): + target_pt_ji += 1 + # Determine whether to mask element i, j based on whether + # there are valid weights. + weights = cached_weights[j][i] + if weights is False: + # Prepare for the src_data not being masked by storing the + # information that will let us fill the data with zeros and + # weights as one. The weighted average result will be the same, + # but we avoid dividing by zero. + blank_weights[target_pt_ji] = True + new_data_mask_basis[j, i] = True + else: + # Establish which indices are actually in y_indices and x_indices + if isinstance(y_indices, slice): + y_indices = list( + range( + y_indices.start, + y_indices.stop, + y_indices.step or 1, + ) + ) + else: + y_indices = list(y_indices) + + if isinstance(x_indices, slice): + x_indices = list( + range( + x_indices.start, + x_indices.stop, + x_indices.step or 1, + ) + ) + else: + x_indices = list(x_indices) + + # For the weights, we just need the lengths of these as we're + # dropping them into a pre-made array + + len_y = len(y_indices) + len_x = len(x_indices) + + src_area_weights[0:len_y, 0:len_x, target_pt_ji] = weights + + # To build the indices for the source cube, we need equal + # shaped array so we pad with 0s and record the need to mask + # them in src_area_datas_required + padded_y_indices = y_indices + [0] * (max_y_indices - len_y) + padded_x_indices = x_indices + [0] * (max_x_indices - len_x) + + square_data_indices_y[..., target_pt_ji] = np.array( + padded_y_indices + )[:, np.newaxis] + square_data_indices_x[..., target_pt_ji] = padded_x_indices + + src_area_datas_required[0:len_y, 0:len_x, target_pt_ji] = True + + # Package up the return data + + weights_info = ( + blank_weights, + src_area_weights, + new_data_mask_basis, + ) + + index_info = ( + result_x_extent, + result_y_extent, + square_data_indices_y, + square_data_indices_x, + src_area_datas_required, + ) + + # Now return it + return ( src_x, src_y, @@ -907,6 +1019,7 @@ def _calculate_regrid_area_weighted_weights( meshgrid_x, meshgrid_y, weights_info, + index_info, ) @@ -929,6 +1042,7 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform( meshgrid_x, meshgrid_y, weights_info, + index_info, ) = regrid_info # Calculate new data array for regridded cube. @@ -937,6 +1051,7 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform( x_dim=src_x_dim, y_dim=src_y_dim, weights_info=weights_info, + index_info=index_info, mdtol=mdtol, ) diff --git a/lib/iris/tests/integration/analysis/test_area_weighted.py b/lib/iris/tests/integration/analysis/test_area_weighted.py new file mode 100644 index 0000000000..d2f1a4ad44 --- /dev/null +++ b/lib/iris/tests/integration/analysis/test_area_weighted.py @@ -0,0 +1,77 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +"""Integration tests for area weighted regridding.""" + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests # isort:skip + +import tempfile + +import iris +from iris.analysis import AreaWeighted + + +@tests.skip_data +class AreaWeightedTests(tests.IrisTest): + def setUp(self): + # Prepare a cube and a template + + cube_file_path = tests.get_data_path( + ["NetCDF", "regrid", "regrid_xyt.nc"] + ) + self.cube = iris.load_cube(cube_file_path) + + template_file_path = tests.get_data_path( + ["NetCDF", "regrid", "regrid_template_global_latlon.nc"] + ) + self.template_cube = iris.load_cube(template_file_path) + + def test_regrid_area_w_lazy(self): + # Regrid the cube onto the template. + out = self.cube.regrid(self.template_cube, AreaWeighted()) + # Check data is still lazy + self.assertTrue(self.cube.has_lazy_data()) + self.assertTrue(out.has_lazy_data()) + # Save the data + with tempfile.TemporaryFile(suffix=".nc") as fp: + iris.save(out, fp) + + def test_regrid_area_w_lazy_chunked(self): + # Chunked data makes the regridder run repeatedly + self.cube.data = self.cube.lazy_data().rechunk((1, -1, -1)) + # Regrid the cube onto the template. + out = self.cube.regrid(self.template_cube, AreaWeighted()) + # Check data is still lazy + self.assertTrue(self.cube.has_lazy_data()) + self.assertTrue(out.has_lazy_data()) + # Save the data + with tempfile.TemporaryFile(suffix=".nc") as fp: + iris.save(out, fp) + + def test_regrid_area_w_real_save(self): + real_cube = self.cube.copy() + real_cube.data + # Regrid the cube onto the template. + out = real_cube.regrid(self.template_cube, AreaWeighted()) + # Realise the data + out.data + # Save the data + with tempfile.TemporaryFile(suffix=".nc") as fp: + iris.save(out, fp) + + def test_regrid_area_w_real_start(self): + real_cube = self.cube.copy() + real_cube.data + # Regrid the cube onto the template. + out = real_cube.regrid(self.template_cube, AreaWeighted()) + # Save the data + with tempfile.TemporaryFile(suffix=".nc") as fp: + iris.save(out, fp) + + +if __name__ == "__main__": + tests.main() diff --git a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py index 2fbfbacfd2..a44ccb32bd 100644 --- a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py +++ b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py @@ -15,6 +15,7 @@ import numpy as np +from iris import load_cube from iris.analysis._area_weighted import AreaWeightedRegridder from iris.coord_systems import GeogCS from iris.coords import DimCoord @@ -50,7 +51,7 @@ def check_mdtol(self, mdtol=None): src_grid, target_grid ) ) - self.assertEqual(len(_regrid_info), 9) + self.assertEqual(len(_regrid_info), 10) with mock.patch( "iris.experimental.regrid." "_regrid_area_weighted_rectilinear_src_and_grid__prepare", @@ -247,5 +248,41 @@ def test_src_data_different_dims(self): ) +@tests.skip_data +class TestLazy(tests.IrisTest): + # Setup + def setUp(self) -> None: + # Prepare a cube and a template + + cube_file_path = tests.get_data_path( + ["NetCDF", "regrid", "regrid_xyt.nc"] + ) + self.cube = load_cube(cube_file_path) + + template_file_path = tests.get_data_path( + ["NetCDF", "regrid", "regrid_template_global_latlon.nc"] + ) + self.template_cube = load_cube(template_file_path) + + # Chunked data makes the regridder run repeatedly + self.cube.data = self.cube.lazy_data().rechunk((1, -1, -1)) + + def test_src_stays_lazy(self) -> None: + cube = self.cube.copy() + # Regrid the cube onto the template. + regridder = AreaWeightedRegridder(cube, self.template_cube) + regridder(cube) + # Base cube stays lazy + self.assertTrue(cube.has_lazy_data()) + + def test_output_lazy(self) -> None: + cube = self.cube.copy() + # Regrid the cube onto the template. + regridder = AreaWeightedRegridder(cube, self.template_cube) + out = regridder(cube) + # Lazy base cube means lazy output + self.assertTrue(out.has_lazy_data()) + + if __name__ == "__main__": tests.main()