diff --git a/benchmarks/benchmarks/generate_data/__init__.py b/benchmarks/benchmarks/generate_data/__init__.py index bb53e26b2f..3366bec6e0 100644 --- a/benchmarks/benchmarks/generate_data/__init__.py +++ b/benchmarks/benchmarks/generate_data/__init__.py @@ -106,11 +106,14 @@ def load_realised(): file loading, but some benchmarks are only meaningful if starting with real arrays. """ + from iris.fileformats._nc_load_rules import helpers from iris.fileformats.netcdf.loader import _get_cf_var_data as pre_patched def patched(cf_var, filename): return as_concrete_data(pre_patched(cf_var, filename)) - netcdf._get_cf_var_data = patched - yield netcdf - netcdf._get_cf_var_data = pre_patched + netcdf.loader._get_cf_var_data = patched + helpers._get_cf_var_data = patched + yield + netcdf.loader._get_cf_var_data = pre_patched + helpers._get_cf_var_data = pre_patched diff --git a/benchmarks/benchmarks/generate_data/stock.py b/benchmarks/benchmarks/generate_data/stock.py index 04698e8ff5..47014078e7 100644 --- a/benchmarks/benchmarks/generate_data/stock.py +++ b/benchmarks/benchmarks/generate_data/stock.py @@ -162,7 +162,8 @@ def realistic_4d_w_everything(w_mesh=False, lazy=False) -> iris.cube.Cube: lazy : bool If True, the Cube will be returned with all arrays as they would normally be loaded from file (i.e. most will still be lazy Dask - arrays). If False, all arrays will be realised NumPy arrays. + arrays). If False, all arrays (except derived coordinates) will be + realised NumPy arrays. """ diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 32c97b9cac..881c3ab64e 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -48,11 +48,11 @@ This document explains the changes made to Iris for this release However, :meth:`~iris.cube.Cube.transpose` will work, as will :meth:`~iris.cube.Cube.copy`. Note that, ``cube.copy(data=iris.DATALESS)`` will provide a dataless copy of a cube. (:issue:`4447`, :pull:`6253`) - + #. `@ESadek-MO`_ added the :mod:`iris.quickplot` ``footer`` kwarg to render text in the bottom right of the plot figure. (:issue:`6247`, :pull:`6332`) - + 🐛 Bugs Fixed ============= @@ -65,6 +65,9 @@ This document explains the changes made to Iris for this release older NetCDF formats e.g. ``NETCDF4_CLASSIC`` support a maximum precision of 32-bit. (:issue:`6178`, :pull:`6343`) +#. `@bouweandela`_ fixed handling of masked Dask arrays in + :func:`~iris.util.array_equal`. + 💣 Incompatible Changes ======================= @@ -145,6 +148,8 @@ This document explains the changes made to Iris for this release #. `@trexfeathers`_ temporarily pinned Sphinx to `<8.2`. (:pull:`6344`, :issue:`6345`) +#. `@bouweandela`_ fixed a bug in the benchmarking code that caused all benchmarks + to be run with lazy data. (:pull:`6339`) .. comment Whatsnew author names (@github name) in alphabetical order. Note that, diff --git a/lib/iris/coords.py b/lib/iris/coords.py index bc0991565c..ca73dcb729 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -589,21 +589,22 @@ def __eq__(self, other): if hasattr(other, "metadata"): # metadata comparison eq = self.metadata == other.metadata + + # Also consider bounds, if we have them. + # (N.B. though only Coords can ever actually *have* bounds). + if eq and eq is not NotImplemented: + eq = self.has_bounds() is other.has_bounds() + # data values comparison if eq and eq is not NotImplemented: eq = iris.util.array_equal( self._core_values(), other._core_values(), withnans=True ) - - # Also consider bounds, if we have them. - # (N.B. though only Coords can ever actually *have* bounds). if eq and eq is not NotImplemented: if self.has_bounds() and other.has_bounds(): eq = iris.util.array_equal( self.core_bounds(), other.core_bounds(), withnans=True ) - else: - eq = not self.has_bounds() and not other.has_bounds() return eq diff --git a/lib/iris/tests/unit/concatenate/test_hashing.py b/lib/iris/tests/unit/concatenate/test_hashing.py index 24062a2af3..88064e4e46 100644 --- a/lib/iris/tests/unit/concatenate/test_hashing.py +++ b/lib/iris/tests/unit/concatenate/test_hashing.py @@ -9,6 +9,8 @@ import pytest from iris import _concatenate +from iris.tests.unit.util.test_array_equal import TEST_CASES +from iris.util import array_equal @pytest.mark.parametrize( @@ -75,6 +77,20 @@ def test_compute_hashes(a, b, eq): assert eq == (hashes["a"] == hashes["b"]) +@pytest.mark.parametrize( + "a,b", + [ + (a, b) + for (a, b, withnans, eq) in TEST_CASES + if isinstance(a, np.ndarray | da.Array) and isinstance(b, np.ndarray | da.Array) + ], +) +def test_compute_hashes_vs_array_equal(a, b): + """Test that hashing give the same answer as `array_equal(withnans=True)`.""" + hashes = _concatenate._compute_hashes({"a": a, "b": b}) + assert array_equal(a, b, withnans=True) == (hashes["a"] == hashes["b"]) + + def test_arrayhash_equal_incompatible_chunks_raises(): hash1 = _concatenate._ArrayHash(1, chunks=((1, 1),)) hash2 = _concatenate._ArrayHash(1, chunks=((2,),)) diff --git a/lib/iris/tests/unit/util/test_array_equal.py b/lib/iris/tests/unit/util/test_array_equal.py index 3e1aaf1bfb..eafe123aed 100644 --- a/lib/iris/tests/unit/util/test_array_equal.py +++ b/lib/iris/tests/unit/util/test_array_equal.py @@ -4,133 +4,190 @@ # See LICENSE in the root of the repository for full licensing details. """Test function :func:`iris.util.array_equal`.""" +import dask.array as da import numpy as np import numpy.ma as ma +import pytest from iris.util import array_equal - -class Test: - def test_0d(self): - array_a = np.array(23) - array_b = np.array(23) - array_c = np.array(7) - assert array_equal(array_a, array_b) - assert not array_equal(array_a, array_c) - - def test_0d_and_scalar(self): - array_a = np.array(23) - assert array_equal(array_a, 23) - assert not array_equal(array_a, 45) - - def test_1d_and_sequences(self): - for sequence_type in (list, tuple): - seq_a = sequence_type([1, 2, 3]) - array_a = np.array(seq_a) - assert array_equal(array_a, seq_a) - assert not array_equal(array_a, seq_a[:-1]) - array_a[1] = 45 - assert not array_equal(array_a, seq_a) - - def test_nd(self): - array_a = np.array(np.arange(24).reshape(2, 3, 4)) - array_b = np.array(np.arange(24).reshape(2, 3, 4)) - array_c = np.array(np.arange(24).reshape(2, 3, 4)) - array_c[0, 1, 2] = 100 - assert array_equal(array_a, array_b) - assert not array_equal(array_a, array_c) - - def test_masked_is_not_ignored(self): - array_a = ma.masked_array([1, 2, 3], mask=[1, 0, 1]) - array_b = ma.masked_array([2, 2, 2], mask=[1, 0, 1]) - assert array_equal(array_a, array_b) - - def test_masked_is_different(self): - array_a = ma.masked_array([1, 2, 3], mask=[1, 0, 1]) - array_b = ma.masked_array([1, 2, 3], mask=[0, 0, 1]) - assert not array_equal(array_a, array_b) - - def test_masked_isnt_unmasked(self): - array_a = np.array([1, 2, 2]) - array_b = ma.masked_array([1, 2, 2], mask=[0, 0, 1]) - assert not array_equal(array_a, array_b) - - def test_masked_unmasked_equivelance(self): - array_a = np.array([1, 2, 2]) - array_b = ma.masked_array([1, 2, 2]) - array_c = ma.masked_array([1, 2, 2], mask=[0, 0, 0]) - assert array_equal(array_a, array_b) - assert array_equal(array_a, array_c) - - def test_fully_masked_arrays(self): - array_a = ma.masked_array(np.arange(24).reshape(2, 3, 4), mask=True) - array_b = ma.masked_array(np.arange(24).reshape(2, 3, 4), mask=True) - assert array_equal(array_a, array_b) - - def test_fully_masked_0d_arrays(self): - array_a = ma.masked_array(3, mask=True) - array_b = ma.masked_array(3, mask=True) - assert array_equal(array_a, array_b) - - def test_fully_masked_string_arrays(self): - array_a = ma.masked_array(["a", "b", "c"], mask=True) - array_b = ma.masked_array(["a", "b", "c"], mask=[1, 1, 1]) - assert array_equal(array_a, array_b) - - def test_partially_masked_string_arrays(self): - array_a = ma.masked_array(["a", "b", "c"], mask=[1, 0, 1]) - array_b = ma.masked_array(["a", "b", "c"], mask=[1, 0, 1]) - assert array_equal(array_a, array_b) - - def test_string_arrays_equal(self): - array_a = np.array(["abc", "def", "efg"]) - array_b = np.array(["abc", "def", "efg"]) - assert array_equal(array_a, array_b) - - def test_string_arrays_different_contents(self): - array_a = np.array(["abc", "def", "efg"]) - array_b = np.array(["abc", "de", "efg"]) - assert not array_equal(array_a, array_b) - - def test_string_arrays_subset(self): - array_a = np.array(["abc", "def", "efg"]) - array_b = np.array(["abc", "def"]) - assert not array_equal(array_a, array_b) - assert not array_equal(array_b, array_a) - - def test_string_arrays_unequal_dimensionality(self): - array_a = np.array("abc") - array_b = np.array(["abc"]) - array_c = np.array([["abc"]]) - assert not array_equal(array_a, array_b) - assert not array_equal(array_b, array_a) - assert not array_equal(array_a, array_c) - assert not array_equal(array_b, array_c) - - def test_string_arrays_0d_and_scalar(self): - array_a = np.array("foobar") - assert array_equal(array_a, "foobar") - assert not array_equal(array_a, "foo") - assert not array_equal(array_a, "foobar.") - - def test_nan_equality_nan_ne_nan(self): - array_a = np.array([1.0, np.nan, 2.0, np.nan, 3.0]) - array_b = array_a.copy() - assert not array_equal(array_a, array_a) - assert not array_equal(array_a, array_b) - - def test_nan_equality_nan_naneq_nan(self): - array_a = np.array([1.0, np.nan, 2.0, np.nan, 3.0]) - array_b = np.array([1.0, np.nan, 2.0, np.nan, 3.0]) - assert array_equal(array_a, array_a, withnans=True) - assert array_equal(array_a, array_b, withnans=True) - - def test_nan_equality_nan_nanne_a(self): - array_a = np.array([1.0, np.nan, 2.0, np.nan, 3.0]) - array_b = np.array([1.0, np.nan, 2.0, 0.0, 3.0]) - assert not array_equal(array_a, array_b, withnans=True) - - def test_nan_equality_a_nanne_b(self): - array_a = np.array([1.0, np.nan, 2.0, np.nan, 3.0]) - array_b = np.array([1.0, np.nan, 2.0, np.nan, 4.0]) - assert not array_equal(array_a, array_b, withnans=True) +ARRAY1 = np.array(np.arange(24).reshape(2, 3, 4)) +ARRAY1[0, 1, 2] = 100 + +ARRAY2 = np.array([1.0, np.nan, 2.0, np.nan, 3.0]) + +TEST_CASES = [ + # test empty + (np.array([]), np.array([]), False, True), + (np.array([]), np.array([], dtype=np.float64), True, True), + # test 0d + (np.array(23), np.array(23), False, True), + (np.array(23), np.array(7), False, False), + # test 0d and scalar + (np.array(23), 23, False, True), + (np.array(23), 45, False, False), + # test 1d and sequences + (np.array([1, 2, 3]), [1, 2, 3], False, True), + (np.array([1, 2, 3]), [1, 2], False, False), + (np.array([1, 45, 3]), [1, 2, 3], False, False), + (np.array([1, 2, 3]), (1, 2, 3), False, True), + (np.array([1, 2, 3]), (1, 2), False, False), + (np.array([1, 45, 3]), (1, 2, 3), False, False), + # test 3d + ( + np.array(np.arange(24).reshape(2, 3, 4)), + np.array(np.arange(24).reshape(2, 3, 4)), + False, + True, + ), + ( + np.array(np.arange(24).reshape(2, 3, 4)), + ARRAY1, + False, + False, + ), + # test masked is not ignored + ( + ma.masked_array([1, 2, 3], mask=[1, 0, 1]), + ma.masked_array([2, 2, 2], mask=[1, 0, 1]), + False, + True, + ), + # test masked is different + ( + ma.masked_array([1, 2, 3], mask=[1, 0, 1]), + ma.masked_array([1, 2, 3], mask=[0, 0, 1]), + False, + False, + ), + # test masked isn't unmasked + ( + np.array([1, 2, 2]), + ma.masked_array([1, 2, 2], mask=[0, 0, 1]), + False, + False, + ), + ( + ma.masked_array([1, 2, 2], mask=[0, 0, 1]), + ma.masked_array([1, 2, 2]), + False, + False, + ), + ( + np.array([1, 2]), + ma.masked_array([1, 3], mask=[0, 1]), + False, + False, + ), + # test masked/unmasked_equivalence + ( + np.array([1, 2, 2]), + ma.masked_array([1, 2, 2]), + False, + True, + ), + ( + np.array([1, 2, 2]), + ma.masked_array([1, 2, 2], mask=[0, 0, 0]), + False, + True, + ), + # test fully masked arrays + ( + ma.masked_array(np.arange(24).reshape(2, 3, 4), mask=True), + ma.masked_array(np.arange(24).reshape(2, 3, 4), mask=True), + False, + True, + ), + # test fully masked 0d arrays + ( + ma.masked_array(3, mask=True), + ma.masked_array(3, mask=True), + False, + True, + ), + # test fully masked string arrays + ( + ma.masked_array(["a", "b", "c"], mask=True), + ma.masked_array(["a", "b", "c"], mask=[1, 1, 1]), + False, + True, + ), + # test partially masked string arrays + ( + ma.masked_array(["a", "b", "c"], mask=[1, 0, 1]), + ma.masked_array(["a", "b", "c"], mask=[1, 0, 1]), + False, + True, + ), + # test string arrays equal + ( + np.array(["abc", "def", "efg"]), + np.array(["abc", "def", "efg"]), + False, + True, + ), + # test string arrays different contents + ( + np.array(["abc", "def", "efg"]), + np.array(["abc", "de", "efg"]), + False, + False, + ), + # test string arrays subset + ( + np.array(["abc", "def", "efg"]), + np.array(["abc", "def"]), + False, + False, + ), + ( + np.array(["abc", "def"]), + np.array(["abc", "def", "efg"]), + False, + False, + ), + # test string arrays unequal dimensionality + (np.array("abc"), np.array(["abc"]), False, False), + (np.array(["abc"]), np.array("abc"), False, False), + (np.array("abc"), np.array([["abc"]]), False, False), + (np.array(["abc"]), np.array([["abc"]]), False, False), + # test string arrays 0d and scalar + (np.array("foobar"), "foobar", False, True), + (np.array("foobar"), "foo", False, False), + (np.array("foobar"), "foobar.", False, False), + # test nan equality nan ne nan + (ARRAY2, ARRAY2, False, False), + (ARRAY2, ARRAY2.copy(), False, False), + # test nan equality nan naneq nan + (ARRAY2, ARRAY2, True, True), + (ARRAY2, ARRAY2.copy(), True, True), + # test nan equality nan nanne a + ( + np.array([1.0, np.nan, 2.0, np.nan, 3.0]), + np.array([1.0, np.nan, 2.0, 0.0, 3.0]), + True, + False, + ), + # test nan equality a nanne b + ( + np.array([1.0, np.nan, 2.0, np.nan, 3.0]), + np.array([1.0, np.nan, 2.0, np.nan, 4.0]), + True, + False, + ), +] + + +@pytest.mark.parametrize("lazy", [False, True]) +@pytest.mark.parametrize("array_a,array_b,withnans,eq", TEST_CASES) +def test_array_equal(array_a, array_b, withnans, eq, lazy): + if lazy: + identical = array_a is array_b + if isinstance(array_a, np.ndarray): + array_a = da.asarray(array_a, chunks=2) + if isinstance(array_b, np.ndarray): + array_b = da.asarray(array_b, chunks=1) + if identical: + array_b = array_a + assert eq == array_equal(array_a, array_b, withnans=withnans) diff --git a/lib/iris/util.py b/lib/iris/util.py index 94cb077a2f..14682314b0 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -387,14 +387,58 @@ def _rolling_window(array): return rw -def array_equal(array1, array2, withnans=False): +def _masked_array_equal( + array1: np.ndarray, + array2: np.ndarray, + equal_nan: bool, +) -> np.ndarray: + """Return whether two, possibly masked, arrays are equal.""" + mask1 = ma.getmask(array1) + mask2 = ma.getmask(array2) + + # Compare mask equality. + if mask1 is ma.nomask and mask2 is ma.nomask: + eq = True + elif mask1 is ma.nomask: + eq = not mask2.any() + elif mask2 is ma.nomask: + eq = not mask1.any() + else: + eq = np.array_equal(mask1, mask2) + + if not eq: + eqs = np.zeros(array1.shape, dtype=bool) + else: + # Compare data equality. + if not (mask1 is ma.nomask or mask2 is ma.nomask): + # Ignore masked data. + ignore = mask1 + else: + ignore = None + + if equal_nan: + # Ignore data that is np.nan in both arrays. + nanmask = np.isnan(array1) & np.isnan(array2) + if ignore is None: + ignore = nanmask + else: + ignore |= nanmask + + eqs = ma.getdata(array1) == ma.getdata(array2) + if ignore is not None: + eqs = np.where(ignore, True, eqs) + + return eqs + + +def array_equal(array1, array2, withnans: bool = False) -> bool: """Return whether two arrays have the same shape and elements. Parameters ---------- array1, array2 : arraylike Args to be compared, normalised if necessary with :func:`np.asarray`. - withnans : bool, default=False + withnans : default=False When unset (default), the result is False if either input contains NaN points. This is the normal floating-point arithmetic result. When set, return True if inputs contain the same value in all elements, @@ -412,6 +456,9 @@ def array_equal(array1, array2, withnans=False): if withnans and (array1 is array2): return True + if withnans and not (array1.dtype.kind == "f" or array2.dtype.kind == "f"): + withnans = False + def normalise_array(array): if not is_lazy_data(array): if not ma.isMaskedArray(array): @@ -422,18 +469,25 @@ def normalise_array(array): eq = array1.shape == array2.shape if eq: - array1_masked = ma.is_masked(array1) - eq = array1_masked == ma.is_masked(array2) - if eq and array1_masked: - eq = np.array_equal(ma.getmaskarray(array1), ma.getmaskarray(array2)) - if eq: - eqs = array1 == array2 - if withnans and (array1.dtype.kind == "f" or array2.dtype.kind == "f"): - eqs = np.where(np.isnan(array1) & np.isnan(array2), True, eqs) - eq = np.all(eqs) - eq = bool(eq) or eq is ma.masked + if is_lazy_data(array1) or is_lazy_data(array2): + # Use a separate map and reduce operation to avoid running out of memory. + ndim = array1.ndim + indices = tuple(range(ndim)) + eq = da.blockwise( + _masked_array_equal, + indices, + array1, + indices, + array2, + indices, + dtype=bool, + meta=np.empty((0,) * ndim, dtype=bool), + equal_nan=withnans, + ).all() + else: + eq = _masked_array_equal(array1, array2, equal_nan=withnans).all() - return eq + return bool(eq) def approx_equal(a, b, max_absolute_error=1e-10, max_relative_error=1e-10):