diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index ae13b8a883..7d99411fbd 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -38,6 +38,11 @@ This document explains the changes made to Iris for this release your code for new floating point problems if activating this (e.g. when using the :class:`~iris.Constraint` API). (:pull:`6260`) +#. `@pp-mo`_ added a new utility function :func:`~iris.util.equalise_cubes`, to help + with aligning cubes so they can merge / concatenate. + (:issue:`6248`, :pull:`6257`) + + 🐛 Bugs Fixed ============= diff --git a/lib/iris/tests/unit/util/test_equalise_cubes.py b/lib/iris/tests/unit/util/test_equalise_cubes.py new file mode 100644 index 0000000000..5aa0e28c2e --- /dev/null +++ b/lib/iris/tests/unit/util/test_equalise_cubes.py @@ -0,0 +1,313 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Unit tests for the :func:`iris.util.equalise_cubes` function.""" + +import warnings + +from cf_units import Unit +import numpy as np +import pytest + +from iris.coords import DimCoord +from iris.cube import Cube +from iris.util import equalise_cubes +from iris.warnings import IrisUserWarning + + +def _scramble(inputs): + # Reorder items (IN PLACE) to check that order does not affect operation. + # Rather than anything more clever, we'll settle for just reversing the order. + inputs[:] = inputs[::-1] + + +@pytest.fixture(params=["off", "on", "applyall", "scrambled"]) +def usage(request): + # Fixture to check different usage modes for a given operation control keyword + return request.param + + +def _usage_common(usage, op_keyword_name, test_cubes): + kwargs = {} + if usage == "off": + pass + elif usage in ("on", "scrambled"): + kwargs[op_keyword_name] = True + if usage == "scrambled": + # reorder the input cubes, but in-place + _scramble(test_cubes) + elif usage == "applyall": + kwargs["apply_all"] = True + else: + raise ValueError(f"Unrecognised 'usage' option {usage!r}") + default_expected_metadatas = [cube.metadata for cube in test_cubes] + return kwargs, default_expected_metadatas + + +def _cube( + stdname=None, + longname=None, + varname=None, + units="unknown", + cell_methods=(), + **attributes, +): + # Construct a simple test-cube with given metadata properties. + cube = Cube( + [1], + standard_name=stdname, + long_name=longname, + var_name=varname, + cell_methods=cell_methods, + units=units, + attributes=attributes, + ) + return cube + + +_NO_OP_MESSAGE = "'equalise_cubes' call does nothing" + + +class TestNoOperation: + def test(self): + # A standalone test, that a call with no operations enabled raises a warning + with pytest.warns(IrisUserWarning, match=_NO_OP_MESSAGE): + equalise_cubes([]) + + +class WarnChecked: + @pytest.fixture(autouse=True) + def nowarn(self, usage): + if usage == "off": + with pytest.warns(IrisUserWarning, match=_NO_OP_MESSAGE): + yield + else: + with warnings.catch_warnings(): + warnings.simplefilter("error") + yield + + +class TestUnifyNames(WarnChecked): + # Test the 'normalise_names' operation. + def test_simple(self, usage): + sn = "air_temperature" + stdnames = [sn, sn, sn] + longnames = [None, "long1", "long2"] + varnames = ["var1", None, "var2"] + test_cubes = [ + _cube(stdname=stdname, longname=longname, varname=varname) + for stdname, longname, varname in zip(stdnames, longnames, varnames) + ] + kwargs, expected_metadatas = _usage_common(usage, "normalise_names", test_cubes) + + # Calculate expected results + if usage != "off": + # result cube metadata should all be the same, with no varname + meta = _cube(stdname=sn).metadata + expected_metadatas = [meta, meta, meta] + + # Apply operation + results = equalise_cubes(test_cubes, **kwargs) + + # Assert result + assert [cube.metadata for cube in results] == expected_metadatas + + def test_multi(self, usage): + # Show that different cases are resolved independently + sn1, sn2 = "air_temperature", "air_pressure" + stdnames = [sn1, None, None, None, sn2, None] + longnames = ["long1", "long2", None, None, "long3", None] + varnames = ["var1", None, "var3", "var4", None, None] + test_cubes = [ + _cube(stdname=stdname, longname=longname, varname=varname) + for stdname, longname, varname in zip(stdnames, longnames, varnames) + ] + kwargs, expected_metadatas = _usage_common(usage, "normalise_names", test_cubes) + + # Calculate expected results + if usage != "off": + stdnames = [sn1, None, None, None, sn2, None] + longnames = [None, "long2", None, None, None, None] + varnames = [None, None, "var3", "var4", None, None] + expected_metadatas = [ + _cube(stdname=stdname, longname=longname, varname=varname).metadata + for stdname, longname, varname in zip(stdnames, longnames, varnames) + ] + if usage == "scrambled": + _scramble(expected_metadatas) + + # Apply operation + results = equalise_cubes(test_cubes, **kwargs) + + # Assert result + assert [cube.metadata for cube in results] == expected_metadatas + + +class TestEqualiseAttributes(WarnChecked): + # Test the 'equalise_attributes' operation. + def test_calling(self, usage, mocker): + patch = mocker.patch("iris.util.equalise_attributes") + test_cubes = [_cube()] + kwargs, expected_metadatas = _usage_common( + usage, "equalise_attributes", test_cubes + ) + + # Apply operation + equalise_cubes(test_cubes, **kwargs) + + expected_calls = 0 if usage == "off" else 1 + assert len(patch.call_args_list) == expected_calls + + def test_basic_function(self, usage): + test_cubes = [_cube(att_a=10, att_b=1), _cube(att_a=10, att_b=2)] + kwargs, expected_metadatas = _usage_common( + usage, "equalise_attributes", test_cubes + ) + + # Calculate expected results + if usage != "off": + # result cube metadata should all be the same, with no varname + meta = _cube(att_a=10).metadata + expected_metadatas = [meta, meta] + + # Apply operation + results = equalise_cubes(test_cubes, **kwargs) + + # Assert result + assert [cube.metadata for cube in results] == expected_metadatas + + def test_operation_in_groups(self, usage): + # Check that it acts independently within groups (as defined, here, by naming) + test_cubes = [ + _cube(longname="a", att_a=10, att_b=1), + _cube(longname="a", att_a=10, att_b=2), + _cube(longname="b", att_a=10, att_b=1), + _cube(longname="b", att_a=10, att_b=1), + ] + kwargs, expected_metadatas = _usage_common( + usage, "equalise_attributes", test_cubes + ) + + # Calculate expected results + if usage != "off": + # result cube metadata should all be the same, with no varname + expected_metadatas = [ + # the "a" cubes have lost att_b, but the "b" cubes retain it + _cube(longname="a", att_a=10).metadata, + _cube(longname="a", att_a=10).metadata, + _cube(longname="b", att_a=10, att_b=1).metadata, + _cube(longname="b", att_a=10, att_b=1).metadata, + ] + if usage == "scrambled": + _scramble(expected_metadatas) + + # Apply operation + results = equalise_cubes(test_cubes, **kwargs) + + # Assert result + assert [cube.metadata for cube in results] == expected_metadatas + + def test_array_attributes(self, usage): + # Array content is worth a special test because it breaks dictionary equality. + a1 = np.array([4.1, 5.2, 6.3]) + a2 = np.array([1, 2]) + a3 = np.array([1, 3]) + test_cubes = [ + _cube(longname="a", v1=a1, v2=a2), + _cube(longname="a", v1=a1, v2=a3), + _cube(longname="b", v1=a1, v2=a2), + _cube(longname="b", v1=a1, v2=a2), + ] + kwargs, expected_metadatas = _usage_common( + usage, "equalise_attributes", test_cubes + ) + + # Calculate expected results + if usage != "off": + # result cube metadata should all be the same, with no varname + result_cubes = [ + _cube(longname="a", v1=a1), + _cube(longname="a", v1=a1), + _cube(longname="b", v1=a1, v2=a2), + _cube(longname="b", v1=a1, v2=a2), + ] + expected_metadatas = [cube.metadata for cube in result_cubes] + if usage == "scrambled": + _scramble(expected_metadatas) + + # Apply operation + results = equalise_cubes(test_cubes, **kwargs) + + # Assert result + assert [cube.metadata for cube in results] == expected_metadatas + + +class TestUnifyTimeUnits(WarnChecked): + # Test the 'unify_time_units' operation. + def test_calling(self, usage, mocker): + patch = mocker.patch("iris.util.unify_time_units") + test_cubes = [_cube()] + kwargs, expected_metadatas = _usage_common( + usage, "unify_time_units", test_cubes + ) + + # Apply operation + equalise_cubes(test_cubes, **kwargs) + + expected_calls = 0 if usage == "off" else 1 + assert len(patch.call_args_list) == expected_calls + + def _cube_timeunits(self, unit, **kwargs): + cube = _cube(**kwargs) + cube.add_dim_coord(DimCoord([0.0], standard_name="time", units=unit), 0) + return cube + + def test_basic_function(self, usage): + if usage == "scrambled": + pytest.skip("scrambled mode not supported") + tu1, tu2 = [Unit(name) for name in ("days since 1970", "days since 1971")] + cu1, cu2 = self._cube_timeunits(tu1), self._cube_timeunits(tu2) + test_cubes = [cu1, cu2] + kwargs, expected_metadatas = _usage_common( + usage, "unify_time_units", test_cubes + ) + + expected_units = [tu1, tu2 if usage == "off" else tu1] + + # Apply operation + results = equalise_cubes(test_cubes, **kwargs) + + # Assert result + assert [cube.coord("time").units for cube in results] == expected_units + + def test_operation_in_groups(self, usage): + # Check that it acts independently within groups (as defined, here, by naming) + test_cubes = [ + _cube(longname="a", att_a=10, att_b=1), + _cube(longname="a", att_a=10, att_b=2), + _cube(longname="b", att_a=10, att_b=1), + _cube(longname="b", att_a=10, att_b=1), + ] + kwargs, expected_metadatas = _usage_common( + usage, "equalise_attributes", test_cubes + ) + + # Calculate expected results + if usage != "off": + # result cube metadata should all be the same, with no varname + expected_metadatas = [ + # the "a" cubes have lost att_b, but the "b" cubes retain it + _cube(longname="a", att_a=10).metadata, + _cube(longname="a", att_a=10).metadata, + _cube(longname="b", att_a=10, att_b=1).metadata, + _cube(longname="b", att_a=10, att_b=1).metadata, + ] + if usage == "scrambled": + _scramble(expected_metadatas) + + # Apply operation + results = equalise_cubes(test_cubes, **kwargs) + + # Assert result + assert [cube.metadata for cube in results] == expected_metadatas diff --git a/lib/iris/util.py b/lib/iris/util.py index a808087fd8..9681ab1484 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -8,6 +8,7 @@ from abc import ABCMeta, abstractmethod from collections.abc import Hashable, Iterable +from copy import deepcopy import functools import inspect import os @@ -15,6 +16,7 @@ import sys import tempfile from typing import Literal +from warnings import warn import cf_units from dask import array as da @@ -27,6 +29,7 @@ from iris.common import SERVICES from iris.common.lenient import _lenient_client import iris.exceptions +import iris.warnings def broadcast_to_shape(array, shape, dim_map, chunks=None): @@ -2189,3 +2192,137 @@ def mask_cube_from_shapefile(cube, shape, minimum_weight=0.0, in_place=False): masked_cube = mask_cube(cube, shapefile_mask, in_place=in_place) if not in_place: return masked_cube + + +def equalise_cubes( + cubes, + apply_all=False, + normalise_names=False, + equalise_attributes=False, + unify_time_units=False, +): + """Modify a set of cubes to assist merge/concatenate operations. + + Various different adjustments can be applied to the input cubes, to remove + differences which may prevent them from combining into larger cubes. The requested + "equalisation" operations are applied to each group of input cubes with matching + cube metadata (names, units, attributes and cell-methods). + + Parameters + ---------- + cubes : sequence of :class:`~iris.cube.Cube` + The input cubes, in a list or similar. + + apply_all : bool, default=False + Enable *all* the equalisation operations. + + normalise_names : bool, default=False + When True, remove any redundant ``var_name`` and ``long_name`` properties, + leaving only one ``standard_name``, ``long_name`` or ``var_name`` per cube. + In this case, the adjusted names are also used when selecting input groups. + + equalise_attributes : bool, default=False + When ``True``, apply an :func:`equalise_attributes` operation to each input + group. In this case, attributes are ignored when selecting input groups. + + unify_time_units : bool, default=False + When True, apply the :func:`unify_time_units` operation to each input group. + Note : while this may convert units of time reference coordinates, it does + not affect the units of the cubes themselves. + + Returns + ------- + :class:`~iris.cube.CubeList` + A CubeList containing the original input cubes, modified as required (in-place) + ready for merge or concatenate operations. + + Notes + ----- + All the 'equalise' operations operate in a similar fashion, in that they identify + and remove differences in a specific metadata element, altering metadata so that + a merge or concatenate can potentially combine a group of cubes into a single + result cube. + + The various 'equalise' operations are not applied to the entire input, but to + groups of input cubes with the same ``cube.metadata``. + + The input cube groups also depend on the equalisation operation(s) selected : + Operations which equalise a specific cube metadata element (names, units, + attributes or cell-methods) exclude that element from the input grouping criteria. + + """ + from iris.common.metadata import CubeMetadata + from iris.cube import CubeList + + if normalise_names or apply_all: + # Rationalise all the cube names + # Note: this option operates as a special case, independent of + # and *in advance of* the group selection + # (hence, it affects the groups which other operations are applied to) + for cube in cubes: + if cube.standard_name: + cube.long_name = None + cube.var_name = None + elif cube.long_name: + cube.var_name = None + + # Snapshot the cube metadata elements which we use to identify input groups + # TODO: we might want to sanitise practically comparable types here ? + # (e.g. large object arrays ??) + cube_grouping_values = [ + { + field: deepcopy(getattr(cube.metadata, field)) + for field in CubeMetadata._fields + } + for cube in cubes + ] + + # Collect the selected operations which we are going to apply. + equalisation_ops = [] + + if equalise_attributes or apply_all: + # get the function of the same name in this module + equalisation_ops.append(globals()["equalise_attributes"]) + # Prevent attributes from distinguishing input groups + for grouping_values in cube_grouping_values: + grouping_values.pop("attributes") + + if unify_time_units or apply_all: + # get the function of the same name in this module + equalisation_ops.append(globals()["unify_time_units"]) + + if not equalisation_ops: + if not normalise_names: + msg = ( + "'equalise_cubes' call does nothing, as no equalisation operations " + "are enabled (neither `apply_all` nor any individual keywords set)." + ) + warn(msg, category=iris.warnings.IrisUserWarning) + + else: + # NOTE: if no "equalisation_ops", nothing more to do. + # However, if 'unify-names' was done, we *already* modified cubes in-place. + + # Group the cubes into sets with the same 'grouping values'. + # N.B. we *can't* use sets, or dictionary key checking, as our 'values' are not + # always hashable -- e.g. especially, array attributes. + # I fear this can be inefficient (repeated array compare), but maybe unavoidable + # TODO: might something nasty happen here if attributes contain weird stuff ?? + cubegroup_values = [] + cubegroup_cubes = [] + for cube, grouping_values in zip(cubes, cube_grouping_values): + if grouping_values not in cubegroup_values: + cubegroup_values.append(grouping_values) + cubegroup_cubes.append([cube]) + else: + i_at = cubegroup_values.index(grouping_values) + cubegroup_cubes[i_at].append(cube) + + # Apply operations to the groups : in-place modifications on the cubes + for group_cubes in cubegroup_cubes: + for op in equalisation_ops: + op(group_cubes) + + # Return a CubeList result = the *original* cubes, as modified + result = CubeList(cubes) + return result