From 0f36c91ffe366858f8477350ccf99de27070643d Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Fri, 5 Nov 2021 13:23:38 +0000 Subject: [PATCH 1/7] Refactor regrid.py's regrid_area_weighted_rectilinear to move weight array construction and src_cube index construction into prepare step --- lib/iris/analysis/_area_weighted.py | 2 + lib/iris/experimental/regrid.py | 431 ++++++++++++++++------------ 2 files changed, 254 insertions(+), 179 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 93459c5cc8..ae162f6c53 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -71,6 +71,7 @@ def __init__(self, src_grid_cube, target_grid_cube, mdtol=1): self.meshgrid_x, self.meshgrid_y, self.weights_info, + self.index_info, ) = _regrid_info def __call__(self, cube): @@ -124,6 +125,7 @@ def __call__(self, cube): self.meshgrid_x, self.meshgrid_y, self.weights_info, + self.index_info, ) return _regrid_area_weighted_rectilinear_src_and_grid__perform( cube, _regrid_info, mdtol=self._mdtol diff --git a/lib/iris/experimental/regrid.py b/lib/iris/experimental/regrid.py index b484b9eb78..38501e957a 100644 --- a/lib/iris/experimental/regrid.py +++ b/lib/iris/experimental/regrid.py @@ -403,7 +403,9 @@ def _weighted_mean_with_mdtol(data, weights, axis=None, mdtol=0): return res -def _regrid_area_weighted_array(src_data, x_dim, y_dim, weights_info, mdtol=0): +def _regrid_area_weighted_array( + src_data, x_dim, y_dim, weights_info, index_info, mdtol=0 +): """ Regrid the given data from its source grid to a new grid using an area weighted mean to determine the resulting data values. @@ -444,13 +446,19 @@ def _regrid_area_weighted_array(src_data, x_dim, y_dim, weights_info, mdtol=0): """ ( - cached_x_indices, - cached_y_indices, - max_x_indices, - max_y_indices, - cached_weights, + blank_weights, + src_area_weights, + new_data_mask_basis, ) = weights_info + ( + result_x_extent, + result_y_extent, + square_data_indices_y, + square_data_indices_x, + src_area_datas_required, + ) = index_info + # Ensure we have x_dim and y_dim. x_dim_orig = x_dim y_dim_orig = y_dim @@ -479,60 +487,47 @@ def _regrid_area_weighted_array(src_data, x_dim, y_dim, weights_info, mdtol=0): # Note that dtype is not preserved and that the array mask # allows for regions that do not overlap. new_shape = list(src_data.shape) - new_shape[x_dim] = len(cached_x_indices) - new_shape[y_dim] = len(cached_y_indices) - num_target_pts = len(cached_y_indices) * len(cached_x_indices) - src_areas_shape = list(src_data.shape) - src_areas_shape[y_dim] = max_y_indices - src_areas_shape[x_dim] = max_x_indices - src_areas_shape += [num_target_pts] + new_shape[x_dim] = result_x_extent + new_shape[y_dim] = result_y_extent + # Use input cube dtype or convert values to the smallest possible float # dtype when necessary. dtype = np.promote_types(src_data.dtype, np.float16) - # Create empty arrays to hold src_data per target point, and weights - src_area_datas = np.zeros(src_areas_shape, dtype=np.float64) - src_area_weights = np.zeros( - list((max_y_indices, max_x_indices, num_target_pts)) + + # Axes of data over which the weighted mean is calculated. + axis = (y_dim, x_dim) + + # Use previously established indices + + src_area_datas_square = src_data[ + ..., square_data_indices_y, square_data_indices_x + ] + + _, src_area_datas_required = np.broadcast_arrays( + src_area_datas_square, src_area_datas_required + ) + + src_area_datas = np.where( + src_area_datas_required, src_area_datas_square, 0 ) # Flag to indicate whether the original data was a masked array. src_masked = src_data.mask.any() if ma.isMaskedArray(src_data) else False if src_masked: - src_area_masks = np.full(src_areas_shape, True, dtype=np.bool_) - else: - new_data_mask = np.full(new_shape, False, dtype=np.bool_) + src_area_masks_square = src_data.mask[ + ..., square_data_indices_y, square_data_indices_x + ] + src_area_masks = np.where( + src_area_datas_required, src_area_masks_square, True + ) - # Axes of data over which the weighted mean is calculated. - axis = (y_dim, x_dim) + else: + # If the weights were originally blank, set the weights to all 1 to + # avoid divide by 0 error and set the new data mask for making the + # values 0 + src_area_weights = np.where(blank_weights, 1, src_area_weights) - # Stack the src_area data and weights for each target point - target_pt_ji = -1 - for j, y_indices in enumerate(cached_y_indices): - for i, x_indices in enumerate(cached_x_indices): - target_pt_ji += 1 - # Determine whether to mask element i, j based on whether - # there are valid weights. - weights = cached_weights[j][i] - if isinstance(weights, bool) and not weights: - if not src_masked: - # Cheat! Fill the data with zeros and weights as one. - # The weighted average result will be the same, but - # we avoid dividing by zero. - src_area_weights[..., target_pt_ji] = 1 - new_data_mask[..., j, i] = True - else: - # Calculate weighted mean of data points. - # Slice out relevant data (this may or may not be a view() - # depending on x_indices being a slice or not). - data = src_data[..., y_indices, x_indices] - len_x = data.shape[-1] - len_y = data.shape[-2] - src_area_datas[..., 0:len_y, 0:len_x, target_pt_ji] = data - src_area_weights[0:len_y, 0:len_x, target_pt_ji] = weights - if src_masked: - src_area_masks[ - ..., 0:len_y, 0:len_x, target_pt_ji - ] = data.mask + new_data_mask = np.broadcast_to(new_data_mask_basis, new_shape) # Broadcast the weights array to allow numpy's ma.average # to be called. @@ -758,145 +753,220 @@ def _regrid_area_weighted_rectilinear_src_and_grid__prepare( else: area_func = _cartesian_area - def _calculate_regrid_area_weighted_weights( - src_x_bounds, - src_y_bounds, - grid_x_bounds, - grid_y_bounds, - grid_x_decreasing, - grid_y_decreasing, - area_func, - circular=False, - ): - """ - Compute the area weights used for area-weighted regridding. + # Recap + # * src_x_bounds: + # A NumPy array of bounds along the X axis defining the source grid. + # * src_y_bounds: + # A NumPy array of bounds along the Y axis defining the source grid. + # * grid_x_bounds: + # A NumPy array of bounds along the X axis defining the new grid. + # * grid_y_bounds: + # A NumPy array of bounds along the Y axis defining the new grid. + # * grid_x_decreasing: + # Boolean indicating whether the X coordinate of the new grid is + # in descending order. + # * grid_y_decreasing: + # Boolean indicating whether the Y coordinate of the new grid is + # in descending order. + # * area_func: + # A function that returns an (p, q) array of weights given an (p, 2) + # shaped array of Y bounds and an (q, 2) shaped array of X bounds. + # * circular: + # A boolean indicating whether the `src_x_bounds` are periodic. + # Default is False. + + # Determine which grid bounds are within src extent. + y_within_bounds = _within_bounds( + src_y_bounds, grid_y_bounds, grid_y_decreasing + ) + x_within_bounds = _within_bounds( + src_x_bounds, grid_x_bounds, grid_x_decreasing + ) - Args: + # Cache which src_bounds are within grid bounds + cached_x_bounds = [] + cached_x_indices = [] + max_x_indices = 0 + for (x_0, x_1) in grid_x_bounds: + if grid_x_decreasing: + x_0, x_1 = x_1, x_0 + x_bounds, x_indices = _cropped_bounds(src_x_bounds, x_0, x_1) + cached_x_bounds.append(x_bounds) + cached_x_indices.append(x_indices) + # Keep record of the largest slice + if isinstance(x_indices, slice): + x_indices_size = np.sum(x_indices.stop - x_indices.start) + else: # is tuple of indices + x_indices_size = len(x_indices) + if x_indices_size > max_x_indices: + max_x_indices = x_indices_size + + # Cache which y src_bounds areas and weights are within grid bounds + cached_y_indices = [] + cached_weights = [] + max_y_indices = 0 + for j, (y_0, y_1) in enumerate(grid_y_bounds): + # Reverse lower and upper if dest grid is decreasing. + if grid_y_decreasing: + y_0, y_1 = y_1, y_0 + y_bounds, y_indices = _cropped_bounds(src_y_bounds, y_0, y_1) + cached_y_indices.append(y_indices) + # Keep record of the largest slice + if isinstance(y_indices, slice): + y_indices_size = np.sum(y_indices.stop - y_indices.start) + else: # is tuple of indices + y_indices_size = len(y_indices) + if y_indices_size > max_y_indices: + max_y_indices = y_indices_size + + weights_i = [] + for i, (x_0, x_1) in enumerate(grid_x_bounds): + # Reverse lower and upper if dest grid is decreasing. + if grid_x_decreasing: + x_0, x_1 = x_1, x_0 + x_bounds = cached_x_bounds[i] + x_indices = cached_x_indices[i] + + # Determine whether element i, j overlaps with src and hence + # an area weight should be computed. + # If x_0 > x_1 then we want [0]->x_1 and x_0->[0] + mod in the case + # of wrapped longitudes. However if the src grid is not global + # (i.e. circular) this new cell would include a region outside of + # the extent of the src grid and thus the weight is therefore + # invalid. + outside_extent = x_0 > x_1 and not circular + if ( + outside_extent + or not y_within_bounds[j] + or not x_within_bounds[i] + ): + weights = False + else: + # Calculate weights based on areas of cropped bounds. + if isinstance(x_indices, tuple) and isinstance( + y_indices, tuple + ): + raise RuntimeError( + "Cannot handle split bounds " "in both x and y." + ) + weights = area_func(y_bounds, x_bounds) + weights_i.append(weights) + cached_weights.append(weights_i) - * src_x_bounds: - A NumPy array of bounds along the X axis defining the source grid. - * src_y_bounds: - A NumPy array of bounds along the Y axis defining the source grid. - * grid_x_bounds: - A NumPy array of bounds along the X axis defining the new grid. - * grid_y_bounds: - A NumPy array of bounds along the Y axis defining the new grid. - * grid_x_decreasing: - Boolean indicating whether the X coordinate of the new grid is - in descending order. - * grid_y_decreasing: - Boolean indicating whether the Y coordinate of the new grid is - in descending order. - * area_func: - A function that returns an (p, q) array of weights given an (p, 2) - shaped array of Y bounds and an (q, 2) shaped array of X bounds. - - Kwargs: - - * circular: - A boolean indicating whether the `src_x_bounds` are periodic. - Default is False. + # Go further, calculating the full weights array that we'll need in the + # perform step and the indices we'll need to extract from the cube we're + # regridding (src_data) - Returns: - The area weights to be used for area-weighted regridding. + result_y_extent = len(cached_y_indices) + result_x_extent = len(cached_x_indices) - """ - # Determine which grid bounds are within src extent. - y_within_bounds = _within_bounds( - src_y_bounds, grid_y_bounds, grid_y_decreasing - ) - x_within_bounds = _within_bounds( - src_x_bounds, grid_x_bounds, grid_x_decreasing - ) + # Total number of points + num_target_pts = result_y_extent * result_x_extent - # Cache which src_bounds are within grid bounds - cached_x_bounds = [] - cached_x_indices = [] - max_x_indices = 0 - for (x_0, x_1) in grid_x_bounds: - if grid_x_decreasing: - x_0, x_1 = x_1, x_0 - x_bounds, x_indices = _cropped_bounds(src_x_bounds, x_0, x_1) - cached_x_bounds.append(x_bounds) - cached_x_indices.append(x_indices) - # Keep record of the largest slice - if isinstance(x_indices, slice): - x_indices_size = np.sum(x_indices.stop - x_indices.start) - else: # is tuple of indices - x_indices_size = len(x_indices) - if x_indices_size > max_x_indices: - max_x_indices = x_indices_size - - # Cache which y src_bounds areas and weights are within grid bounds - cached_y_indices = [] - cached_weights = [] - max_y_indices = 0 - for j, (y_0, y_1) in enumerate(grid_y_bounds): - # Reverse lower and upper if dest grid is decreasing. - if grid_y_decreasing: - y_0, y_1 = y_1, y_0 - y_bounds, y_indices = _cropped_bounds(src_y_bounds, y_0, y_1) - cached_y_indices.append(y_indices) - # Keep record of the largest slice - if isinstance(y_indices, slice): - y_indices_size = np.sum(y_indices.stop - y_indices.start) - else: # is tuple of indices - y_indices_size = len(y_indices) - if y_indices_size > max_y_indices: - max_y_indices = y_indices_size - - weights_i = [] - for i, (x_0, x_1) in enumerate(grid_x_bounds): - # Reverse lower and upper if dest grid is decreasing. - if grid_x_decreasing: - x_0, x_1 = x_1, x_0 - x_bounds = cached_x_bounds[i] - x_indices = cached_x_indices[i] - - # Determine whether element i, j overlaps with src and hence - # an area weight should be computed. - # If x_0 > x_1 then we want [0]->x_1 and x_0->[0] + mod in the case - # of wrapped longitudes. However if the src grid is not global - # (i.e. circular) this new cell would include a region outside of - # the extent of the src grid and thus the weight is therefore - # invalid. - outside_extent = x_0 > x_1 and not circular - if ( - outside_extent - or not y_within_bounds[j] - or not x_within_bounds[i] - ): - weights = False + # Create empty array to hold weights + src_area_weights = np.zeros( + list((max_y_indices, max_x_indices, num_target_pts)) + ) + + # Built for the case where the source cube isn't masked + blank_weights = np.zeros((num_target_pts,)) + new_data_mask_basis = np.full( + (len(cached_y_indices), len(cached_x_indices)), False, dtype=np.bool_ + ) + + # To permit fancy indexing, we're going to store our data in an array of + # the size of the biggest. This means we need to track whether the data in + # that array is actually required and build those squared-off arrays + # TODO: Consider if a proper mask would be better + src_area_datas_required = np.full( + (max_y_indices, max_x_indices, num_target_pts), False + ) + square_data_indices_y = np.zeros( + (max_y_indices, max_x_indices, num_target_pts), dtype=int + ) + square_data_indices_x = np.zeros( + (max_y_indices, max_x_indices, num_target_pts), dtype=int + ) + + # Stack the weights for each target point and build the indices we'll need + # to extract the src_area_data + target_pt_ji = -1 + for j, y_indices in enumerate(cached_y_indices): + for i, x_indices in enumerate(cached_x_indices): + target_pt_ji += 1 + # Determine whether to mask element i, j based on whether + # there are valid weights. + weights = cached_weights[j][i] + if isinstance(weights, bool) and not weights: + # Prepare for the src_data not being masked by storing the + # information that will let us fill the data with zeros and + # weights as one. The weighted average result will be the same, + # but we avoid dividing by zero. + blank_weights[target_pt_ji] = True + new_data_mask_basis[j, i] = True + else: + # Establish which indices are actually in y_indices and x_indices + if isinstance(y_indices, slice): + y_indices = list( + range( + y_indices.start, + y_indices.stop, + y_indices.step or 1, + ) + ) else: - # Calculate weights based on areas of cropped bounds. - if isinstance(x_indices, tuple) and isinstance( - y_indices, tuple - ): - raise RuntimeError( - "Cannot handle split bounds " "in both x and y." + y_indices = list(y_indices) + + if isinstance(x_indices, slice): + x_indices = list( + range( + x_indices.start, + x_indices.stop, + x_indices.step or 1, ) - weights = area_func(y_bounds, x_bounds) - weights_i.append(weights) - cached_weights.append(weights_i) - return ( - tuple(cached_x_indices), - tuple(cached_y_indices), - max_x_indices, - max_y_indices, - tuple(cached_weights), - ) + ) + else: + x_indices = list(x_indices) + + # For the weights, we just need the lengths of these as we'd + # dropping them into a pre-made array - weights_info = _calculate_regrid_area_weighted_weights( - src_x_bounds, - src_y_bounds, - grid_x_bounds, - grid_y_bounds, - grid_x_decreasing, - grid_y_decreasing, - area_func, - circular, + len_y = len(y_indices) + len_x = len(x_indices) + + src_area_weights[0:len_y, 0:len_x, target_pt_ji] = weights + + # To build the indices for the source cube, we need equal + # shaped array so we pad with 0s and record the need to mask + # them in src_area_datas_required + padded_y_indices = y_indices + [0] * (max_y_indices - len_y) + padded_x_indices = x_indices + [0] * (max_x_indices - len_x) + + square_data_indices_y[..., target_pt_ji] = np.array( + padded_y_indices + )[:, np.newaxis] + square_data_indices_x[..., target_pt_ji] = padded_x_indices + + src_area_datas_required[0:len_y, 0:len_x, target_pt_ji] = True + + # Package up the return data + + weights_info = ( + blank_weights, + src_area_weights, + new_data_mask_basis, ) + index_info = ( + result_x_extent, + result_y_extent, + square_data_indices_y, + square_data_indices_x, + src_area_datas_required, + ) + + # Now return it + return ( src_x, src_y, @@ -907,6 +977,7 @@ def _calculate_regrid_area_weighted_weights( meshgrid_x, meshgrid_y, weights_info, + index_info, ) @@ -929,6 +1000,7 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform( meshgrid_x, meshgrid_y, weights_info, + index_info, ) = regrid_info # Calculate new data array for regridded cube. @@ -937,6 +1009,7 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform( x_dim=src_x_dim, y_dim=src_y_dim, weights_info=weights_info, + index_info=index_info, mdtol=mdtol, ) From 68a7a5cf12e1aa4da22d018b4dfb19a1c3c67471 Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Fri, 5 Nov 2021 13:56:55 +0000 Subject: [PATCH 2/7] Update test to expect the additional output value from prepare step --- .../unit/analysis/area_weighted/test_AreaWeightedRegridder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py index 2fbfbacfd2..7fada79be6 100644 --- a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py +++ b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py @@ -50,7 +50,7 @@ def check_mdtol(self, mdtol=None): src_grid, target_grid ) ) - self.assertEqual(len(_regrid_info), 9) + self.assertEqual(len(_regrid_info), 10) with mock.patch( "iris.experimental.regrid." "_regrid_area_weighted_rectilinear_src_and_grid__prepare", From d058b2ef9c88818e104cfe6697ebe63610219ef2 Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Tue, 9 Nov 2021 16:24:22 +0000 Subject: [PATCH 3/7] Integration tests of area weighted regrid workflow (open, regrid, save) --- .../analysis/test_area_weighted.py | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 lib/iris/tests/integration/analysis/test_area_weighted.py diff --git a/lib/iris/tests/integration/analysis/test_area_weighted.py b/lib/iris/tests/integration/analysis/test_area_weighted.py new file mode 100644 index 0000000000..304419fa92 --- /dev/null +++ b/lib/iris/tests/integration/analysis/test_area_weighted.py @@ -0,0 +1,73 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +"""Integration tests for area weighted regridding.""" + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests # isort:skip + +import tempfile + +import iris +from iris.analysis import AreaWeighted + + +@tests.skip_data +class AreaWeightedTests(tests.IrisTest): + def setup(self): + # Prepare a cube and a template + + cube_file_path = tests.get_data_path( + ["NetCDF", "regrid", "regrid_xyt.nc"] + ) + self.cube = iris.load_cube(cube_file_path) + + template_file_path = tests.get_data_path( + ["NetCDF", "regrid", "regrid_template_global_latlon.nc"] + ) + self.template_cube = iris.load_cube(template_file_path) + + def test_regrid_area_w_lazy(self): + # Regrid the cube onto the template. + out = self.cube.regrid(self.template_cube, AreaWeighted()) + # Check data is still lazy + self.assertTrue(self.cube.has_lazy_data()) + self.assertTrue(out.has_lazy_data()) + # Save the data + with tempfile.TemporaryFile(suffix=".nc") as fp: + iris.save(out, fp) + + def test_regrid_area_w_lazy_chunked(self): + # Chunked data makes the regridder run repeatedly + self.cube.data = self.cube.lazy_data().rechunk((1, -1, -1)) + # Regrid the cube onto the template. + out = self.cube.regrid(self.template_cube, AreaWeighted()) + # Check data is still lazy + self.assertTrue(self.cube.has_lazy_data()) + self.assertTrue(out.has_lazy_data()) + # Save the data + with tempfile.TemporaryFile(suffix=".nc") as fp: + iris.save(out, fp) + + def test_regrid_area_w_real_save(self): + real_cube = self.cube.copy() + real_cube.data + # Regrid the cube onto the template. + out = real_cube.regrid(self.template_cube, AreaWeighted()) + # Realise the data + out.data + # Save the data + with tempfile.TemporaryFile(suffix=".nc") as fp: + iris.save(out, fp) + + def test_regrid_area_w_real_start(self): + real_cube = self.cube.copy() + real_cube.data + # Regrid the cube onto the template. + out = real_cube.regrid(self.template_cube, AreaWeighted()) + # Save the data + with tempfile.TemporaryFile(suffix=".nc") as fp: + iris.save(out, fp) From 7d1c2eabc1175464d74e01c5ff524ff1194d923b Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Wed, 10 Nov 2021 12:19:23 +0000 Subject: [PATCH 4/7] Unit tests to check source cube and output cube are lazy if not explicitly realised --- .../test_AreaWeightedRegridder.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py index 7fada79be6..4b1dba87e8 100644 --- a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py +++ b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py @@ -15,6 +15,7 @@ import numpy as np +from iris import load_cube from iris.analysis._area_weighted import AreaWeightedRegridder from iris.coord_systems import GeogCS from iris.coords import DimCoord @@ -247,5 +248,41 @@ def test_src_data_different_dims(self): ) +@tests.skip_data +class TestLazy(tests.IrisTest): + # Setup + def setup(self) -> None: + # Prepare a cube and a template + + cube_file_path = tests.get_data_path( + ["NetCDF", "regrid", "regrid_xyt.nc"] + ) + self.cube = load_cube(cube_file_path) + + template_file_path = tests.get_data_path( + ["NetCDF", "regrid", "regrid_template_global_latlon.nc"] + ) + self.template_cube = load_cube(template_file_path) + + # Chunked data makes the regridder run repeatedly + self.cube.data = self.cube.lazy_data().rechunk((1, -1, -1)) + + def test_src_stays_lazy(self) -> None: + cube = self.cube.copy() + # Regrid the cube onto the template. + regridder = AreaWeightedRegridder(cube, self.template_cube) + regridder.regrid(cube) + # Base cube stays lazy + self.assertTrue(cube.has_lazy_data()) + + def test_output_lazy(self) -> None: + cube = self.cube.copy() + # Regrid the cube onto the template. + regridder = AreaWeightedRegridder(cube, self.template_cube) + out = regridder.regrid(cube) + # Lazy base cube means lazy output + self.assertTrue(out.has_lazy_data()) + + if __name__ == "__main__": tests.main() From 4fddc7fb96328236a0e69026710c5ef1803df5df Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Thu, 11 Nov 2021 12:13:55 +0000 Subject: [PATCH 5/7] Restore the function defined within a function to improve diff --- lib/iris/experimental/regrid.py | 229 +++++++++++++++++++------------- 1 file changed, 134 insertions(+), 95 deletions(-) diff --git a/lib/iris/experimental/regrid.py b/lib/iris/experimental/regrid.py index 38501e957a..ca8e2dd2ae 100644 --- a/lib/iris/experimental/regrid.py +++ b/lib/iris/experimental/regrid.py @@ -753,105 +753,144 @@ def _regrid_area_weighted_rectilinear_src_and_grid__prepare( else: area_func = _cartesian_area - # Recap - # * src_x_bounds: - # A NumPy array of bounds along the X axis defining the source grid. - # * src_y_bounds: - # A NumPy array of bounds along the Y axis defining the source grid. - # * grid_x_bounds: - # A NumPy array of bounds along the X axis defining the new grid. - # * grid_y_bounds: - # A NumPy array of bounds along the Y axis defining the new grid. - # * grid_x_decreasing: - # Boolean indicating whether the X coordinate of the new grid is - # in descending order. - # * grid_y_decreasing: - # Boolean indicating whether the Y coordinate of the new grid is - # in descending order. - # * area_func: - # A function that returns an (p, q) array of weights given an (p, 2) - # shaped array of Y bounds and an (q, 2) shaped array of X bounds. - # * circular: - # A boolean indicating whether the `src_x_bounds` are periodic. - # Default is False. - - # Determine which grid bounds are within src extent. - y_within_bounds = _within_bounds( - src_y_bounds, grid_y_bounds, grid_y_decreasing - ) - x_within_bounds = _within_bounds( - src_x_bounds, grid_x_bounds, grid_x_decreasing - ) + def _calculate_regrid_area_weighted_weights( + src_x_bounds, + src_y_bounds, + grid_x_bounds, + grid_y_bounds, + grid_x_decreasing, + grid_y_decreasing, + area_func, + circular=False, + ): + """ + Compute the area weights used for area-weighted regridding. + Args: + * src_x_bounds: + A NumPy array of bounds along the X axis defining the source grid. + * src_y_bounds: + A NumPy array of bounds along the Y axis defining the source grid. + * grid_x_bounds: + A NumPy array of bounds along the X axis defining the new grid. + * grid_y_bounds: + A NumPy array of bounds along the Y axis defining the new grid. + * grid_x_decreasing: + Boolean indicating whether the X coordinate of the new grid is + in descending order. + * grid_y_decreasing: + Boolean indicating whether the Y coordinate of the new grid is + in descending order. + * area_func: + A function that returns an (p, q) array of weights given an (p, 2) + shaped array of Y bounds and an (q, 2) shaped array of X bounds. + Kwargs: + * circular: + A boolean indicating whether the `src_x_bounds` are periodic. + Default is False. + Returns: + The area weights to be used for area-weighted regridding. + """ + # Determine which grid bounds are within src extent. + y_within_bounds = _within_bounds( + src_y_bounds, grid_y_bounds, grid_y_decreasing + ) + x_within_bounds = _within_bounds( + src_x_bounds, grid_x_bounds, grid_x_decreasing + ) - # Cache which src_bounds are within grid bounds - cached_x_bounds = [] - cached_x_indices = [] - max_x_indices = 0 - for (x_0, x_1) in grid_x_bounds: - if grid_x_decreasing: - x_0, x_1 = x_1, x_0 - x_bounds, x_indices = _cropped_bounds(src_x_bounds, x_0, x_1) - cached_x_bounds.append(x_bounds) - cached_x_indices.append(x_indices) - # Keep record of the largest slice - if isinstance(x_indices, slice): - x_indices_size = np.sum(x_indices.stop - x_indices.start) - else: # is tuple of indices - x_indices_size = len(x_indices) - if x_indices_size > max_x_indices: - max_x_indices = x_indices_size - - # Cache which y src_bounds areas and weights are within grid bounds - cached_y_indices = [] - cached_weights = [] - max_y_indices = 0 - for j, (y_0, y_1) in enumerate(grid_y_bounds): - # Reverse lower and upper if dest grid is decreasing. - if grid_y_decreasing: - y_0, y_1 = y_1, y_0 - y_bounds, y_indices = _cropped_bounds(src_y_bounds, y_0, y_1) - cached_y_indices.append(y_indices) - # Keep record of the largest slice - if isinstance(y_indices, slice): - y_indices_size = np.sum(y_indices.stop - y_indices.start) - else: # is tuple of indices - y_indices_size = len(y_indices) - if y_indices_size > max_y_indices: - max_y_indices = y_indices_size - - weights_i = [] - for i, (x_0, x_1) in enumerate(grid_x_bounds): - # Reverse lower and upper if dest grid is decreasing. + # Cache which src_bounds are within grid bounds + cached_x_bounds = [] + cached_x_indices = [] + max_x_indices = 0 + for (x_0, x_1) in grid_x_bounds: if grid_x_decreasing: x_0, x_1 = x_1, x_0 - x_bounds = cached_x_bounds[i] - x_indices = cached_x_indices[i] - - # Determine whether element i, j overlaps with src and hence - # an area weight should be computed. - # If x_0 > x_1 then we want [0]->x_1 and x_0->[0] + mod in the case - # of wrapped longitudes. However if the src grid is not global - # (i.e. circular) this new cell would include a region outside of - # the extent of the src grid and thus the weight is therefore - # invalid. - outside_extent = x_0 > x_1 and not circular - if ( - outside_extent - or not y_within_bounds[j] - or not x_within_bounds[i] - ): - weights = False - else: - # Calculate weights based on areas of cropped bounds. - if isinstance(x_indices, tuple) and isinstance( - y_indices, tuple + x_bounds, x_indices = _cropped_bounds(src_x_bounds, x_0, x_1) + cached_x_bounds.append(x_bounds) + cached_x_indices.append(x_indices) + # Keep record of the largest slice + if isinstance(x_indices, slice): + x_indices_size = np.sum(x_indices.stop - x_indices.start) + else: # is tuple of indices + x_indices_size = len(x_indices) + if x_indices_size > max_x_indices: + max_x_indices = x_indices_size + + # Cache which y src_bounds areas and weights are within grid bounds + cached_y_indices = [] + cached_weights = [] + max_y_indices = 0 + for j, (y_0, y_1) in enumerate(grid_y_bounds): + # Reverse lower and upper if dest grid is decreasing. + if grid_y_decreasing: + y_0, y_1 = y_1, y_0 + y_bounds, y_indices = _cropped_bounds(src_y_bounds, y_0, y_1) + cached_y_indices.append(y_indices) + # Keep record of the largest slice + if isinstance(y_indices, slice): + y_indices_size = np.sum(y_indices.stop - y_indices.start) + else: # is tuple of indices + y_indices_size = len(y_indices) + if y_indices_size > max_y_indices: + max_y_indices = y_indices_size + + weights_i = [] + for i, (x_0, x_1) in enumerate(grid_x_bounds): + # Reverse lower and upper if dest grid is decreasing. + if grid_x_decreasing: + x_0, x_1 = x_1, x_0 + x_bounds = cached_x_bounds[i] + x_indices = cached_x_indices[i] + + # Determine whether element i, j overlaps with src and hence + # an area weight should be computed. + # If x_0 > x_1 then we want [0]->x_1 and x_0->[0] + mod in the case + # of wrapped longitudes. However if the src grid is not global + # (i.e. circular) this new cell would include a region outside of + # the extent of the src grid and thus the weight is therefore + # invalid. + outside_extent = x_0 > x_1 and not circular + if ( + outside_extent + or not y_within_bounds[j] + or not x_within_bounds[i] ): - raise RuntimeError( - "Cannot handle split bounds " "in both x and y." - ) - weights = area_func(y_bounds, x_bounds) - weights_i.append(weights) - cached_weights.append(weights_i) + weights = False + else: + # Calculate weights based on areas of cropped bounds. + if isinstance(x_indices, tuple) and isinstance( + y_indices, tuple + ): + raise RuntimeError( + "Cannot handle split bounds " "in both x and y." + ) + weights = area_func(y_bounds, x_bounds) + weights_i.append(weights) + cached_weights.append(weights_i) + return ( + tuple(cached_x_indices), + tuple(cached_y_indices), + max_x_indices, + max_y_indices, + tuple(cached_weights), + ) + + ( + cached_x_indices, + cached_y_indices, + max_x_indices, + max_y_indices, + cached_weights, + ) = _calculate_regrid_area_weighted_weights( + src_x_bounds, + src_y_bounds, + grid_x_bounds, + grid_y_bounds, + grid_x_decreasing, + grid_y_decreasing, + area_func, + circular, + ) # Go further, calculating the full weights array that we'll need in the # perform step and the indices we'll need to extract from the cube we're From 5ec6ae9c6f34bd806dd8167c61a3b7cdd2baf21e Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Thu, 11 Nov 2021 17:09:59 +0000 Subject: [PATCH 6/7] Review changes pt1 --- lib/iris/experimental/regrid.py | 15 +++++++++------ .../integration/analysis/test_area_weighted.py | 6 +++++- .../area_weighted/test_AreaWeightedRegridder.py | 2 +- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/lib/iris/experimental/regrid.py b/lib/iris/experimental/regrid.py index ca8e2dd2ae..9a02fbd3b1 100644 --- a/lib/iris/experimental/regrid.py +++ b/lib/iris/experimental/regrid.py @@ -896,8 +896,8 @@ def _calculate_regrid_area_weighted_weights( # perform step and the indices we'll need to extract from the cube we're # regridding (src_data) - result_y_extent = len(cached_y_indices) - result_x_extent = len(cached_x_indices) + result_y_extent = len(grid_y_bounds) + result_x_extent = len(grid_x_bounds) # Total number of points num_target_pts = result_y_extent * result_x_extent @@ -913,8 +913,11 @@ def _calculate_regrid_area_weighted_weights( (len(cached_y_indices), len(cached_x_indices)), False, dtype=np.bool_ ) - # To permit fancy indexing, we're going to store our data in an array of - # the size of the biggest. This means we need to track whether the data in + # To permit fancy indexing, we need to store our data in an array whose + # first two dimensions represent the indices needed for the target cell. + # Since target cells can require a different number of indices, the size of + # these dimensions should be the maximum of this number. + # This means we need to track whether the data in # that array is actually required and build those squared-off arrays # TODO: Consider if a proper mask would be better src_area_datas_required = np.full( @@ -936,7 +939,7 @@ def _calculate_regrid_area_weighted_weights( # Determine whether to mask element i, j based on whether # there are valid weights. weights = cached_weights[j][i] - if isinstance(weights, bool) and not weights: + if weights is False: # Prepare for the src_data not being masked by storing the # information that will let us fill the data with zeros and # weights as one. The weighted average result will be the same, @@ -967,7 +970,7 @@ def _calculate_regrid_area_weighted_weights( else: x_indices = list(x_indices) - # For the weights, we just need the lengths of these as we'd + # For the weights, we just need the lengths of these as we're # dropping them into a pre-made array len_y = len(y_indices) diff --git a/lib/iris/tests/integration/analysis/test_area_weighted.py b/lib/iris/tests/integration/analysis/test_area_weighted.py index 304419fa92..d2f1a4ad44 100644 --- a/lib/iris/tests/integration/analysis/test_area_weighted.py +++ b/lib/iris/tests/integration/analysis/test_area_weighted.py @@ -17,7 +17,7 @@ @tests.skip_data class AreaWeightedTests(tests.IrisTest): - def setup(self): + def setUp(self): # Prepare a cube and a template cube_file_path = tests.get_data_path( @@ -71,3 +71,7 @@ def test_regrid_area_w_real_start(self): # Save the data with tempfile.TemporaryFile(suffix=".nc") as fp: iris.save(out, fp) + + +if __name__ == "__main__": + tests.main() diff --git a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py index 4b1dba87e8..a747e11d7a 100644 --- a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py +++ b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py @@ -251,7 +251,7 @@ def test_src_data_different_dims(self): @tests.skip_data class TestLazy(tests.IrisTest): # Setup - def setup(self) -> None: + def setUp(self) -> None: # Prepare a cube and a template cube_file_path = tests.get_data_path( From b60b6ad551330513bb1630ebd0f06d72191da2b4 Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Mon, 15 Nov 2021 12:19:21 +0000 Subject: [PATCH 7/7] Fix how we call the regridder --- .../unit/analysis/area_weighted/test_AreaWeightedRegridder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py index a747e11d7a..a44ccb32bd 100644 --- a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py +++ b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py @@ -271,7 +271,7 @@ def test_src_stays_lazy(self) -> None: cube = self.cube.copy() # Regrid the cube onto the template. regridder = AreaWeightedRegridder(cube, self.template_cube) - regridder.regrid(cube) + regridder(cube) # Base cube stays lazy self.assertTrue(cube.has_lazy_data()) @@ -279,7 +279,7 @@ def test_output_lazy(self) -> None: cube = self.cube.copy() # Regrid the cube onto the template. regridder = AreaWeightedRegridder(cube, self.template_cube) - out = regridder.regrid(cube) + out = regridder(cube) # Lazy base cube means lazy output self.assertTrue(out.has_lazy_data())