diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index ce534d9461..ed42de42f4 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -93,6 +93,10 @@ This document explains the changes made to Iris for this release :mod:`iris.fileformats._nc_load_rules.helpers` to lessen warning duplication. (:issue:`5536`, :pull:`5685`) +#. `@bjlittle`_ fixed coordinate construction in the NetCDF loading pipeline to + ensure that bounds have the same units as the associated points. + (:issue:`1801`, :pull:`5746`) + 💣 Incompatible Changes ======================= diff --git a/lib/iris/fileformats/_nc_load_rules/helpers.py b/lib/iris/fileformats/_nc_load_rules/helpers.py index 98357c1f26..3cf6aab945 100644 --- a/lib/iris/fileformats/_nc_load_rules/helpers.py +++ b/lib/iris/fileformats/_nc_load_rules/helpers.py @@ -13,8 +13,10 @@ build routines, and which it does not use. """ +from __future__ import annotations + import re -from typing import List +from typing import TYPE_CHECKING, List, Optional import warnings import cf_units @@ -35,6 +37,12 @@ import iris.std_names import iris.util +if TYPE_CHECKING: + from numpy.ma import MaskedArray + from numpy.typing import ArrayLike + + from iris.fileformats.cf import CFBoundaryVariable + # TODO: should un-addable coords / cell measures / etcetera be skipped? iris#5068. # @@ -896,8 +904,9 @@ def get_attr_units(cf_var, attributes): cf_units.as_unit(attr_units) except ValueError: # Using converted unicode message. Can be reverted with Python 3. - msg = "Ignoring netCDF variable {!r} invalid units {!r}".format( - cf_var.cf_name, attr_units + msg = ( + f"Ignoring invalid units {attr_units!r} on netCDF variable " + f"{cf_var.cf_name!r}." ) warnings.warn( msg, @@ -1024,6 +1033,57 @@ def reorder_bounds_data(bounds_data, cf_bounds_var, cf_coord_var): return bounds_data +################################################################################ +def _normalise_bounds_units( + points_units: str, cf_bounds_var: CFBoundaryVariable, bounds_data: ArrayLike +) -> Optional[MaskedArray]: + """Ensure bounds have units compatible with points. + + If required, the `bounds_data` will be converted to the `points_units`. + If the bounds units are not convertible, a warning will be issued and + the `bounds_data` will be ignored. + + Bounds with invalid units will be gracefully left unconverted and passed through. + + Parameters + ---------- + points_units : str + The units of the coordinate points. + cf_bounds_var : CFBoundaryVariable + The serialized NetCDF bounds variable. + bounds_data : MaskedArray + The pre-processed data of the bounds variable. + + Returns + ------- + MaskedArray or None + The bounds data with the same units as the points, or ``None`` + if the bounds units are not convertible to the points units. + + """ + bounds_units = get_attr_units(cf_bounds_var, {}) + + if bounds_units != UNKNOWN_UNIT_STRING: + points_units = cf_units.Unit(points_units) + bounds_units = cf_units.Unit(bounds_units) + + if bounds_units != points_units: + if bounds_units.is_convertible(points_units): + bounds_data = bounds_units.convert(bounds_data, points_units) + else: + wmsg = ( + f"Ignoring bounds on NetCDF variable {cf_bounds_var.cf_name!r}. " + f"Expected units compatible with {points_units.origin!r}, got " + f"{bounds_units.origin!r}." + ) + warnings.warn( + wmsg, category=iris.exceptions.IrisCfLoadWarning, stacklevel=2 + ) + bounds_data = None + + return bounds_data + + ################################################################################ def build_dimension_coordinate( engine, cf_coord_var, coord_name=None, coord_system=None @@ -1061,6 +1121,8 @@ def build_dimension_coordinate( # dimension names. if cf_bounds_var.shape[:-1] != cf_coord_var.shape: bounds_data = reorder_bounds_data(bounds_data, cf_bounds_var, cf_coord_var) + + bounds_data = _normalise_bounds_units(attr_units, cf_bounds_var, bounds_data) else: bounds_data = None @@ -1186,6 +1248,8 @@ def build_auxiliary_coordinate( # compatibility with array creators (i.e. dask) bounds_data = np.asarray(bounds_data) bounds_data = reorder_bounds_data(bounds_data, cf_bounds_var, cf_coord_var) + + bounds_data = _normalise_bounds_units(attr_units, cf_bounds_var, bounds_data) else: bounds_data = None diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test__normalise_bounds_units.py b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test__normalise_bounds_units.py new file mode 100644 index 0000000000..f6233847e4 --- /dev/null +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test__normalise_bounds_units.py @@ -0,0 +1,102 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Test function :func:`iris.fileformats._nc_load_rules.helpers._normalise_bounds_units`.""" + +# import iris tests first so that some things can be initialised before +# importing anything else +from typing import Optional +from unittest import mock + +import numpy as np +import pytest + +from iris.exceptions import IrisCfLoadWarning +from iris.fileformats._nc_load_rules.helpers import ( + _normalise_bounds_units, + _WarnComboIgnoringCfLoad, +) + +BOUNDS = mock.sentinel.bounds +CF_NAME = "dummy_bnds" + + +def _make_cf_bounds_var( + units: Optional[str] = None, + unitless: bool = False, +) -> mock.MagicMock: + """Construct a mock CF bounds variable.""" + if units is None: + units = "days since 1970-01-01" + + cf_data = mock.Mock(spec=[]) + # we want to mock the absence of flag attributes to helpers.get_attr_units + # see https://docs.python.org/3/library/unittest.mock.html#deleting-attributes + del cf_data.flag_values + del cf_data.flag_masks + del cf_data.flag_meanings + + cf_var = mock.MagicMock( + cf_name=CF_NAME, + cf_data=cf_data, + units=units, + calendar=None, + dtype=float, + ) + + if unitless: + del cf_var.units + + return cf_var + + +def test_unitless() -> None: + """Test bounds variable with no units.""" + cf_bounds_var = _make_cf_bounds_var(unitless=True) + result = _normalise_bounds_units(None, cf_bounds_var, BOUNDS) + assert result == BOUNDS + + +def test_invalid_units__pass_through() -> None: + """Test bounds variable with invalid units.""" + units = "invalid" + cf_bounds_var = _make_cf_bounds_var(units=units) + wmsg = f"Ignoring invalid units {units!r} on netCDF variable {CF_NAME!r}" + with pytest.warns(_WarnComboIgnoringCfLoad, match=wmsg): + result = _normalise_bounds_units(None, cf_bounds_var, BOUNDS) + assert result == BOUNDS + + +@pytest.mark.parametrize("units", ["unknown", "no_unit", "1", "kelvin"]) +def test_ignore_bounds(units) -> None: + """Test bounds variable with incompatible units compared to points.""" + points_units = "km" + cf_bounds_var = _make_cf_bounds_var(units=units) + wmsg = ( + f"Ignoring bounds on NetCDF variable {CF_NAME!r}. " + f"Expected units compatible with {points_units!r}" + ) + with pytest.warns(IrisCfLoadWarning, match=wmsg): + result = _normalise_bounds_units(points_units, cf_bounds_var, BOUNDS) + assert result is None + + +def test_compatible() -> None: + """Test bounds variable with compatible units requiring conversion.""" + points_units, bounds_units = "days since 1970-01-01", "hours since 1970-01-01" + cf_bounds_var = _make_cf_bounds_var(units=bounds_units) + bounds = np.arange(10, dtype=float) * 24 + result = _normalise_bounds_units(points_units, cf_bounds_var, bounds) + expected = bounds / 24 + np.testing.assert_array_equal(result, expected) + + +def test_same_units() -> None: + """Test bounds variable with same units as points.""" + units = "days since 1970-01-01" + cf_bounds_var = _make_cf_bounds_var(units=units) + bounds = np.arange(10, dtype=float) + result = _normalise_bounds_units(units, cf_bounds_var, bounds) + np.testing.assert_array_equal(result, bounds) + assert result is bounds diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_auxiliary_coordinate.py b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_auxiliary_coordinate.py index e2335d2ee6..73533a9c33 100644 --- a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_auxiliary_coordinate.py +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_auxiliary_coordinate.py @@ -45,7 +45,7 @@ def setUp(self): cf_data=cf_data, standard_name=None, long_name="wibble", - units="m", + units="km", shape=points.shape, size=np.prod(points.shape), dtype=points.dtype, @@ -96,31 +96,42 @@ def _get_per_test_bounds_var(_coord_unused): ) @classmethod - def _make_array_and_cf_data(cls, dimension_names): + def _make_array_and_cf_data(cls, dimension_names, rollaxis=False): shape = tuple(cls.dim_names_lens[name] for name in dimension_names) cf_data = mock.MagicMock(_FillValue=None, spec=[]) cf_data.chunking = mock.MagicMock(return_value=shape) - return np.zeros(shape), cf_data - - def _make_cf_bounds_var(self, dimension_names): + data = np.arange(np.prod(shape), dtype=float) + if rollaxis: + shape = shape[1:] + (shape[0],) + data = data.reshape(shape) + data = np.rollaxis(data, -1) + else: + data = data.reshape(shape) + return data, cf_data + + def _make_cf_bounds_var(self, dimension_names, rollaxis=False): # Create the bounds cf variable. - bounds, cf_data = self._make_array_and_cf_data(dimension_names) + bounds, cf_data = self._make_array_and_cf_data( + dimension_names, rollaxis=rollaxis + ) + bounds *= 1000 # Convert to metres. cf_bounds_var = mock.Mock( spec=CFVariable, dimensions=dimension_names, cf_name="wibble_bnds", cf_data=cf_data, + units="m", shape=bounds.shape, size=np.prod(bounds.shape), dtype=bounds.dtype, __getitem__=lambda self, key: bounds[key], ) - return bounds, cf_bounds_var + return cf_bounds_var - def _check_case(self, dimension_names): - bounds, self.cf_bounds_var = self._make_cf_bounds_var( - dimension_names=dimension_names + def _check_case(self, dimension_names, rollaxis=False): + self.cf_bounds_var = self._make_cf_bounds_var( + dimension_names, rollaxis=rollaxis ) # Asserts must lie within context manager because of deferred loading. @@ -133,15 +144,15 @@ def _check_case(self, dimension_names): expected_list = [(self.expected_coord, self.cf_coord_var.cf_name)] self.assertEqual(self.engine.cube_parts["coordinates"], expected_list) - def test_fastest_varying_vertex_dim(self): + def test_fastest_varying_vertex_dim__normalise_bounds(self): # The usual order. self._check_case(dimension_names=("foo", "bar", "nv")) - def test_slowest_varying_vertex_dim(self): + def test_slowest_varying_vertex_dim__normalise_bounds(self): # Bounds in the first (slowest varying) dimension. - self._check_case(dimension_names=("nv", "foo", "bar")) + self._check_case(dimension_names=("nv", "foo", "bar"), rollaxis=True) - def test_fastest_with_different_dim_names(self): + def test_fastest_with_different_dim_names__normalise_bounds(self): # Despite the dimension names ('x', and 'y') differing from the coord's # which are 'foo' and 'bar' (as permitted by the cf spec), # this should still work because the vertex dim is the fastest varying. @@ -232,6 +243,7 @@ def setUp(self): ) points = np.arange(6) + units = "days since 1970-01-01" self.cf_coord_var = mock.Mock( spec=threadsafe_nc.VariableWrapper, dimensions=("foo",), @@ -241,7 +253,7 @@ def setUp(self): cf_data=mock.MagicMock(chunking=mock.Mock(return_value=None), spec=[]), standard_name=None, long_name="wibble", - units="days since 1970-01-01", + units=units, calendar=None, shape=points.shape, size=np.prod(points.shape), @@ -250,13 +262,20 @@ def setUp(self): ) bounds = np.arange(12).reshape(6, 2) + cf_data = mock.MagicMock(chunking=mock.Mock(return_value=None)) + # we want to mock the absence of flag attributes to helpers.get_attr_units + # see https://docs.python.org/3/library/unittest.mock.html#deleting-attributes + del cf_data.flag_values + del cf_data.flag_masks + del cf_data.flag_meanings self.cf_bounds_var = mock.Mock( spec=threadsafe_nc.VariableWrapper, dimensions=("x", "nv"), scale_factor=1, add_offset=0, cf_name="wibble_bnds", - cf_data=mock.MagicMock(chunking=mock.Mock(return_value=None)), + cf_data=cf_data, + units=units, shape=bounds.shape, size=np.prod(bounds.shape), dtype=bounds.dtype, diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_dimension_coordinate.py b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_dimension_coordinate.py index b2c7d4f4d6..28d710d6b8 100644 --- a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_dimension_coordinate.py +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_dimension_coordinate.py @@ -22,6 +22,26 @@ from iris.fileformats._nc_load_rules.helpers import build_dimension_coordinate +def _make_bounds_var(bounds, dimensions, units): + bounds = np.array(bounds) + cf_data = mock.Mock(spec=[]) + # we want to mock the absence of flag attributes to helpers.get_attr_units + # see https://docs.python.org/3/library/unittest.mock.html#deleting-attributes + del cf_data.flag_values + del cf_data.flag_masks + del cf_data.flag_meanings + return mock.Mock( + dimensions=dimensions, + cf_name="wibble_bnds", + cf_data=cf_data, + units=units, + calendar=None, + shape=bounds.shape, + dtype=bounds.dtype, + __getitem__=lambda self, key: bounds[key], + ) + + class RulesTestMixin: def setUp(self): # Create dummy pyke engine. @@ -65,12 +85,9 @@ def setUp(self): RulesTestMixin.setUp(self) bounds = np.arange(12).reshape(6, 2) - self.cf_bounds_var = mock.Mock( - dimensions=("x", "nv"), - cf_name="wibble_bnds", - shape=bounds.shape, - __getitem__=lambda self, key: bounds[key], - ) + dimensions = ("x", "nv") + units = "days since 1970-01-01" + self.cf_bounds_var = _make_bounds_var(bounds, dimensions, units) self.bounds = bounds # test_dimcoord_not_added() and test_auxcoord_not_added have been @@ -276,25 +293,22 @@ def setUp(self): standard_name=None, long_name="wibble", cf_data=mock.Mock(spec=[]), - units="m", + units="km", shape=points.shape, dtype=points.dtype, __getitem__=lambda self, key: points[key], ) - def test_slowest_varying_vertex_dim(self): + def test_slowest_varying_vertex_dim__normalise_bounds(self): # Create the bounds cf variable. - bounds = np.arange(12).reshape(2, 6) - self.cf_bounds_var = mock.Mock( - dimensions=("nv", "foo"), - cf_name="wibble_bnds", - shape=bounds.shape, - __getitem__=lambda self, key: bounds[key], - ) + bounds = np.arange(12).reshape(2, 6) * 1000 + dimensions = ("nv", "foo") + units = "m" + self.cf_bounds_var = _make_bounds_var(bounds, dimensions, units) # Expected bounds on the resulting coordinate should be rolled so that # the vertex dimension is at the end. - expected_bounds = bounds.transpose() + expected_bounds = bounds.transpose() / 1000 expected_coord = DimCoord( self.cf_coord_var[:], long_name=self.cf_coord_var.long_name, @@ -314,21 +328,18 @@ def test_slowest_varying_vertex_dim(self): expected_list = [(expected_coord, self.cf_coord_var.cf_name)] self.assertEqual(self.engine.cube_parts["coordinates"], expected_list) - def test_fastest_varying_vertex_dim(self): - bounds = np.arange(12).reshape(6, 2) - self.cf_bounds_var = mock.Mock( - dimensions=("foo", "nv"), - cf_name="wibble_bnds", - shape=bounds.shape, - __getitem__=lambda self, key: bounds[key], - ) + def test_fastest_varying_vertex_dim__normalise_bounds(self): + bounds = np.arange(12).reshape(6, 2) * 1000 + dimensions = ("foo", "nv") + units = "m" + self.cf_bounds_var = _make_bounds_var(bounds, dimensions, units) expected_coord = DimCoord( self.cf_coord_var[:], long_name=self.cf_coord_var.long_name, var_name=self.cf_coord_var.cf_name, units=self.cf_coord_var.units, - bounds=bounds, + bounds=bounds / 1000, ) # Asserts must lie within context manager because of deferred loading. @@ -342,24 +353,21 @@ def test_fastest_varying_vertex_dim(self): expected_list = [(expected_coord, self.cf_coord_var.cf_name)] self.assertEqual(self.engine.cube_parts["coordinates"], expected_list) - def test_fastest_with_different_dim_names(self): + def test_fastest_with_different_dim_names__normalise_bounds(self): # Despite the dimension names 'x' differing from the coord's # which is 'foo' (as permitted by the cf spec), # this should still work because the vertex dim is the fastest varying. - bounds = np.arange(12).reshape(6, 2) - self.cf_bounds_var = mock.Mock( - dimensions=("x", "nv"), - cf_name="wibble_bnds", - shape=bounds.shape, - __getitem__=lambda self, key: bounds[key], - ) + bounds = np.arange(12).reshape(6, 2) * 1000 + dimensions = ("x", "nv") + units = "m" + self.cf_bounds_var = _make_bounds_var(bounds, dimensions, units) expected_coord = DimCoord( self.cf_coord_var[:], long_name=self.cf_coord_var.long_name, var_name=self.cf_coord_var.cf_name, units=self.cf_coord_var.units, - bounds=bounds, + bounds=bounds / 1000, ) # Asserts must lie within context manager because of deferred loading. @@ -396,12 +404,8 @@ def _make_vars(self, points, bounds=None, units="degrees"): ) if bounds: bounds = np.array(bounds).reshape(self.cf_coord_var.shape + (2,)) - self.cf_bounds_var = mock.Mock( - dimensions=("x", "nv"), - cf_name="wibble_bnds", - shape=bounds.shape, - __getitem__=lambda self, key: bounds[key], - ) + dimensions = ("x", "nv") + self.cf_bounds_var = _make_bounds_var(bounds, dimensions, units) def _check_circular(self, circular, *args, **kwargs): if "coord_name" in kwargs: @@ -483,12 +487,13 @@ def _make_vars(self, bounds): # Note that for a scalar the shape of the array from # the cf var is (), rather than (1,). points = np.array([0.0]) + units = "degrees" self.cf_coord_var = mock.Mock( dimensions=(), cf_name="wibble", standard_name=None, long_name="wibble", - units="degrees", + units=units, cf_data=mock.Mock(spec=[]), shape=(), dtype=points.dtype, @@ -496,12 +501,8 @@ def _make_vars(self, bounds): ) bounds = np.array(bounds) - self.cf_bounds_var = mock.Mock( - dimensions=("bnds"), - cf_name="wibble_bnds", - shape=bounds.shape, - __getitem__=lambda self, key: bounds[key], - ) + dimensions = ("bnds",) + self.cf_bounds_var = _make_bounds_var(bounds, dimensions, units) def _assert_circular(self, value): with self.deferred_load_patch, self.get_cf_bounds_var_patch: