From f8408fb85664074b6507a3bbd64218d5ba22cee5 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Fri, 5 May 2023 14:40:28 +0100 Subject: [PATCH 1/7] working for all except masked lazy --- .../unit/util/test_broadcast_to_shape.py | 27 ++++++++++ lib/iris/util.py | 50 +++++++++---------- 2 files changed, 50 insertions(+), 27 deletions(-) diff --git a/lib/iris/tests/unit/util/test_broadcast_to_shape.py b/lib/iris/tests/unit/util/test_broadcast_to_shape.py index 36f00fa53f..370dc7a836 100644 --- a/lib/iris/tests/unit/util/test_broadcast_to_shape.py +++ b/lib/iris/tests/unit/util/test_broadcast_to_shape.py @@ -9,6 +9,10 @@ # importing anything else import iris.tests as tests # isort:skip +from unittest import mock + +import dask +import dask.array as da import numpy as np import numpy.ma as ma @@ -40,6 +44,16 @@ def test_added_dimensions_transpose(self): for j in range(4): self.assertArrayEqual(b[i, :, j, :].T, a) + @mock.patch.object(dask.base, "compute", wraps=dask.base.compute) + def test_lazy_added_dimensions_transpose(self, mocked_compute): + # adding dimensions and having the dimensions of the input + # transposed + a = da.random.random([2, 3]) + b = broadcast_to_shape(a, (5, 3, 4, 2), (3, 1)) + for i in range(5): + for j in range(4): + self.assertArrayEqual(b[i, :, j, :].T.compute(), a.compute()) + def test_masked(self): # masked arrays are also accepted a = np.random.random([2, 3]) @@ -49,6 +63,19 @@ def test_masked(self): for j in range(4): self.assertMaskedArrayEqual(b[i, :, j, :].T, m) + @mock.patch.object(dask.base, "compute", wraps=dask.base.compute) + def test_lazy_masked(self, mocked_compute): + # masked arrays are also accepted + a = np.random.random([2, 3]) + m = da.ma.masked_array(a, mask=[[0, 1, 0], [0, 1, 1]]) + b = broadcast_to_shape(m, (5, 3, 4, 2), (3, 1)) + mocked_compute.assert_not_called() + for i in range(5): + for j in range(4): + self.assertMaskedArrayEqual( + b[i, :, j, :].compute().T, m.compute() + ) + def test_masked_degenerate(self): # masked arrays can have degenerate masks too a = np.random.random([2, 3]) diff --git a/lib/iris/util.py b/lib/iris/util.py index d96e0ee359..3c45c2df5e 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -76,35 +76,31 @@ def broadcast_to_shape(array, shape, dim_map): See more at :doc:`/userguide/real_and_lazy_data`. """ - if len(dim_map) != array.ndim: - # We must check for this condition here because we cannot rely on - # getting an error from numpy if the dim_map argument is not the - # correct length, we might just get a segfault. - raise ValueError( - "dim_map must have an entry for every " - "dimension of the input array" - ) + n_new_dims = len(shape) - len(array.shape) + array = array.reshape(array.shape + (1,) * n_new_dims) - def _broadcast_helper(a): - strides = [0] * len(shape) - for idim, dim in enumerate(dim_map): - if shape[dim] != a.shape[idim]: - # We'll get garbage values if the dimensions of array are not - # those indicated by shape. - raise ValueError("shape and array are not compatible") - strides[dim] = a.strides[idim] - return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) - - array_view = _broadcast_helper(array) - if ma.isMaskedArray(array): - if array.mask is ma.nomask: - # Degenerate masks can be applied as-is. - mask_view = array.mask + # Map current dims to their target dims. + map_dims = tuple(dim_map) + tuple( + n for n in range(len(shape)) if n not in dim_map + ) + # transpose needs the inverse mapping. Keep this as a tuple as dask's transpose + # breaks if this is an array. + inverse_map_dims = tuple(np.argsort(map_dims)) + + # Get dims in required order. + array = np.transpose(array, inverse_map_dims) + new_array = np.broadcast_to(array, shape) + + if ma.isMA(array): + # broadcast_to strips masks so we need to handle them explicitly. + mask = ma.getmask(array) + if mask is ma.nomask: + new_mask = ma.nomask else: - # Mask arrays need to be handled in the same way as the data array. - mask_view = _broadcast_helper(array.mask) - array_view = ma.array(array_view, mask=mask_view) - return array_view + new_mask = np.broadcast_to(mask, shape) + new_array = ma.array(new_array, mask=new_mask) + + return new_array def delta(ndarray, dimension, circular=False): From 446c88e694ffab7fac861c2aec76f29ccdfb96c7 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Tue, 9 May 2023 14:46:29 +0100 Subject: [PATCH 2/7] use moveaxis --- lib/iris/util.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/lib/iris/util.py b/lib/iris/util.py index 3c45c2df5e..1c5b0adc9a 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -76,19 +76,12 @@ def broadcast_to_shape(array, shape, dim_map): See more at :doc:`/userguide/real_and_lazy_data`. """ - n_new_dims = len(shape) - len(array.shape) + n_orig_dims = len(array.shape) + n_new_dims = len(shape) - n_orig_dims array = array.reshape(array.shape + (1,) * n_new_dims) - # Map current dims to their target dims. - map_dims = tuple(dim_map) + tuple( - n for n in range(len(shape)) if n not in dim_map - ) - # transpose needs the inverse mapping. Keep this as a tuple as dask's transpose - # breaks if this is an array. - inverse_map_dims = tuple(np.argsort(map_dims)) - # Get dims in required order. - array = np.transpose(array, inverse_map_dims) + array = np.moveaxis(array, range(n_orig_dims), dim_map) new_array = np.broadcast_to(array, shape) if ma.isMA(array): From c2dabb3567b4018a0217b32129f6a8897af8b70b Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Mon, 22 May 2023 11:57:34 +0100 Subject: [PATCH 3/7] handle lazy masked case --- lib/iris/_lazy_data.py | 9 +++++++++ lib/iris/util.py | 8 +++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index e0566fc8f2..4c294a7d2f 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -47,6 +47,15 @@ def is_lazy_data(data): return result +def is_lazy_masked_data(data): + """ + Return True if the argument is both an Iris 'lazy' data array and the + underlying array is of masked type. Otherwise return False. + + """ + return is_lazy_data(data) and ma.isMA(da.utils.meta_from_array(data)) + + @lru_cache def _optimum_chunksize_internals( chunks, diff --git a/lib/iris/util.py b/lib/iris/util.py index 1c5b0adc9a..9283c3a985 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -23,7 +23,7 @@ import numpy.ma as ma from iris._deprecation import warn_deprecated -from iris._lazy_data import as_concrete_data, is_lazy_data +from iris._lazy_data import as_concrete_data, is_lazy_data, is_lazy_masked_data from iris.common import SERVICES from iris.common.lenient import _lenient_client import iris.exceptions @@ -93,6 +93,12 @@ def broadcast_to_shape(array, shape, dim_map): new_mask = np.broadcast_to(mask, shape) new_array = ma.array(new_array, mask=new_mask) + elif is_lazy_masked_data(array): + # broadcast_to strips masks so we need to handle them explicitly. + mask = da.ma.getmaskarray(array) + new_mask = da.broadcast_to(mask, shape) + new_array = da.ma.masked_array(new_array, new_mask) + return new_array From a3461936a7f00718adf3e5cf6c6bd04d5bbb2daa Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Mon, 22 May 2023 12:09:21 +0100 Subject: [PATCH 4/7] add tests for is_lazy_masked_data --- .../lazy_data/test_is_lazy_masked_data.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 lib/iris/tests/unit/lazy_data/test_is_lazy_masked_data.py diff --git a/lib/iris/tests/unit/lazy_data/test_is_lazy_masked_data.py b/lib/iris/tests/unit/lazy_data/test_is_lazy_masked_data.py new file mode 100644 index 0000000000..4d627a706b --- /dev/null +++ b/lib/iris/tests/unit/lazy_data/test_is_lazy_masked_data.py @@ -0,0 +1,27 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +"""Test function :func:`iris._lazy data.is_lazy_masked_data`.""" + +import dask.array as da +import numpy as np +import pytest + +from iris._lazy_data import is_lazy_masked_data + +real_arrays = [ + np.arange(3), + np.ma.array(range(3)), + np.ma.array(range(3), mask=[0, 1, 1]), +] +lazy_arrays = [da.from_array(arr) for arr in real_arrays] + + +@pytest.mark.parametrize( + "arr, expected", zip(real_arrays + lazy_arrays, [False] * 4 + [True] * 2) +) +def test_is_lazy_masked_data(arr, expected): + result = is_lazy_masked_data(arr) + assert result is expected From 79d8241854482cd5ea8fc5a9331cd7185ca82ecb Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Mon, 22 May 2023 12:15:03 +0100 Subject: [PATCH 5/7] whatsnew --- docs/src/whatsnew/latest.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index b78087b8fb..04b5fa6465 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -30,7 +30,8 @@ This document explains the changes made to Iris for this release ✨ Features =========== -#. N/A +#. `@rcomer`_ rewrote :func:`~iris.util.broadcast_to_shape` so it now handles + lazy data. (:pull:`5307`) 🐛 Bugs Fixed From e7e8a4121a3cc645cd95c4f1cf83606fede96c38 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Mon, 22 May 2023 12:19:32 +0100 Subject: [PATCH 6/7] check compute isn't called --- lib/iris/tests/unit/util/test_broadcast_to_shape.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/iris/tests/unit/util/test_broadcast_to_shape.py b/lib/iris/tests/unit/util/test_broadcast_to_shape.py index 370dc7a836..3df1634ba5 100644 --- a/lib/iris/tests/unit/util/test_broadcast_to_shape.py +++ b/lib/iris/tests/unit/util/test_broadcast_to_shape.py @@ -50,6 +50,7 @@ def test_lazy_added_dimensions_transpose(self, mocked_compute): # transposed a = da.random.random([2, 3]) b = broadcast_to_shape(a, (5, 3, 4, 2), (3, 1)) + mocked_compute.assert_not_called() for i in range(5): for j in range(4): self.assertArrayEqual(b[i, :, j, :].T.compute(), a.compute()) From 4b0d6b5e360421a02ec0cf2616b4452a9522b907 Mon Sep 17 00:00:00 2001 From: Ruth Comer Date: Mon, 22 May 2023 13:45:59 +0100 Subject: [PATCH 7/7] update docstring --- lib/iris/util.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/iris/util.py b/lib/iris/util.py index 9283c3a985..0b31ebdafc 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -34,8 +34,7 @@ def broadcast_to_shape(array, shape, dim_map): Broadcast an array to a given shape. Each dimension of the array must correspond to a dimension in the - given shape. Striding is used to repeat the array until it matches - the desired shape, returning repeated views on the original array. + given shape. The result is a read-only view (see :func:`numpy.broadcast_to`). If you need to write to the resulting array, make a copy first. Args: