diff --git a/lib/iris/__init__.py b/lib/iris/__init__.py index d622bd18b0..0dbdc40510 100644 --- a/lib/iris/__init__.py +++ b/lib/iris/__init__.py @@ -169,6 +169,7 @@ def __init__( pandas_ndim=False, save_split_attrs=False, date_microseconds=False, + derived_bounds=False, ): """Container for run-time options controls. @@ -202,6 +203,8 @@ def __init__( behaviour, such as when using :class:`~iris.Constraint`, and you may need to defend against floating point precision issues where you didn't need to before. + derived_bounds : bool, default=False + When True, uses the new form for deriving bounds with the load. """ # The flag 'example_future_flag' is provided as a reference for the @@ -215,6 +218,7 @@ def __init__( self.__dict__["pandas_ndim"] = pandas_ndim self.__dict__["save_split_attrs"] = save_split_attrs self.__dict__["date_microseconds"] = date_microseconds + self.__dict__["derived_bounds"] = derived_bounds # TODO: next major release: set IrisDeprecation to subclass # DeprecationWarning instead of UserWarning. @@ -228,6 +232,7 @@ def __repr__(self): self.pandas_ndim, self.save_split_attrs, self.date_microseconds, + self.derived_bounds, ) # deprecated_options = {'example_future_flag': 'warning',} diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py index 82be010c6e..a47c126e9b 100644 --- a/lib/iris/fileformats/cf.py +++ b/lib/iris/fileformats/cf.py @@ -16,6 +16,7 @@ from abc import ABCMeta, abstractmethod from collections.abc import Iterable, MutableMapping +import contextlib import os import re from typing import ClassVar, Optional @@ -94,6 +95,8 @@ def __init__(self, name, data): #: CF-netCDF formula terms that his variable participates in. self.cf_terms_by_root = {} + self._to_be_promoted = False + self.cf_attrs_reset() @staticmethod @@ -1416,17 +1419,79 @@ def _translate(self, variables): # Identify and register all CF formula terms. formula_terms = _CFFormulaTermsVariable.identify(variables) + if iris.FUTURE.derived_bounds: + # Keep track of all the root vars so we can unpick invalid bounds vars + all_roots = set() + + # cf_var = CFFormulaTermsVariable (loops through everything that appears in formula terms) for cf_var in formula_terms.values(): + # eg. eta:'a' | cf_root = eta and cf_term = a. cf_var.cf_terms_by_root = {'eta': 'a'} (looking at all appearances in formula terms) for cf_root, cf_term in cf_var.cf_terms_by_root.items(): - # Ignore formula terms owned by a bounds variable. + if iris.FUTURE.derived_bounds: + # For the "newstyle" derived-bounds implementation, find vars which appear in derived bounds terms + # and turn them into bounds vars (though they don't appear in a "bounds" attribute) + all_roots.add(cf_root) + + # cf_root_coord = CFCoordinateVariable of the coordinate relating to the root + cf_root_coord = self.cf_group.coordinates.get(cf_root) + if cf_root_coord is None: + cf_root_coord = self.cf_group.auxiliary_coordinates.get(cf_root) + + root_bounds_name = getattr(cf_root_coord, "bounds", None) + # N.B. cf_root_coord may here be None, if the root var was not a + # coord - that is ok, it will not have a 'bounds', we will skip it. + if root_bounds_name in self.cf_group: + # Found a valid *root* bounds variable : search for a corresponding *term* bounds variable + term_bounds_vars = [ + # loop through all formula terms and add them if they have a cf_term_by_root + # where (bounds of cf_root): cf_term (same as before) + f + for f in formula_terms.values() + if f.cf_terms_by_root.get(root_bounds_name) == cf_term + ] + if len(term_bounds_vars) == 1: + (term_bounds_var,) = term_bounds_vars + if term_bounds_var != cf_var: + # N.B. bounds==main-var is valid CF for *no* bounds + cf_var.bounds = term_bounds_var.cf_name + new_var = CFBoundaryVariable( + term_bounds_var.cf_name, term_bounds_var.cf_data + ) + new_var.add_formula_term(root_bounds_name, cf_term) + # "Reclassify" this var as a bounds variable + self.cf_group[term_bounds_var.cf_name] = new_var + else: + # Found 0 term bounds, or >1 : discard the term + # Modify the boundary_variable set _to_be_promoted to True + self.cf_group.get(root_bounds_name)._to_be_promoted = True + if cf_root not in self.cf_group.bounds: + # TODO: explain this section ? cf_name = cf_var.cf_name - if cf_var.cf_name not in self.cf_group: - self.cf_group[cf_name] = CFAuxiliaryCoordinateVariable( - cf_name, cf_var.cf_data - ) + if cf_name not in self.cf_group: + new_var = CFAuxiliaryCoordinateVariable(cf_name, cf_var.cf_data) + if iris.FUTURE.derived_bounds and hasattr(cf_var, "bounds"): + # Implement "new-style" derived bounds link + new_var.bounds = cf_var.bounds + self.cf_group[cf_name] = new_var + self.cf_group[cf_name].add_formula_term(cf_root, cf_term) + if iris.FUTURE.derived_bounds: + for cf_root in all_roots: + # Invalidate "broken" bounds connections + root_var = self.cf_group[cf_root] + if all( + key in root_var.ncattrs() for key in ("bounds", "formula_terms") + ): + root_bounds_var = self.cf_group.get(root_var.bounds) + if ( + root_bounds_var is not None + and "formula_terms" not in root_bounds_var.ncattrs() + ): + # This means it is *not* a valid bounds var, according to CF + root_var.bounds = None + # Determine the CF data variables. data_variable_names = ( set(netcdf_variable_names) - self.cf_group.non_data_variable_names @@ -1454,7 +1519,7 @@ def _span_check( """Sanity check dimensionality.""" var = self.cf_group[var_name] # No span check is necessary if variable is attached to a mesh. - if is_mesh_var or var.spans(cf_variable): + if (is_mesh_var or var.spans(cf_variable)) and not var._to_be_promoted: cf_group[var_name] = var else: # Register the ignored variable. @@ -1498,6 +1563,16 @@ def _span_check( for cf_name in match: _span_check(cf_name) + if iris.FUTURE.derived_bounds: + # TODO: explain this section + if hasattr(cf_variable, "bounds"): + if cf_variable.bounds not in cf_group: + bounds_var = self.cf_group.get(cf_variable.bounds) + if bounds_var: + # TODO: warning if span fails + if bounds_var.spans(cf_variable): + cf_group[cf_variable.bounds] = bounds_var + # Build CF data variable relationships. if isinstance(cf_variable, CFDataVariable): # Add global netCDF attributes. @@ -1540,8 +1615,14 @@ def _span_check( # may be promoted to a CFDataVariable and restrict promotion to only # those formula terms that are reference surface/phenomenon. for cf_var in self.cf_group.formula_terms.values(): + if iris.FUTURE.derived_bounds: + if self.cf_group[cf_var.cf_name] is CFBoundaryVariable: + continue for cf_root, cf_term in cf_var.cf_terms_by_root.items(): cf_root_var = self.cf_group[cf_root] + if iris.FUTURE.derived_bounds: + if not hasattr(cf_root_var, "standard_name"): + continue name = cf_root_var.standard_name or cf_root_var.long_name terms = reference_terms.get(name, []) if isinstance(terms, str) or not isinstance(terms, Iterable): @@ -1552,6 +1633,7 @@ def _span_check( self.cf_group.promoted[cf_var_name] = data_var _build(data_var) break + # Promote any ignored variables. promoted = set() not_promoted = ignored.difference(promoted) diff --git a/lib/iris/tests/integration/netcdf/derived_bounds/__init__.py b/lib/iris/tests/integration/netcdf/derived_bounds/__init__.py new file mode 100644 index 0000000000..0e88f6563d --- /dev/null +++ b/lib/iris/tests/integration/netcdf/derived_bounds/__init__.py @@ -0,0 +1,5 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Integration tests for loading and saving netcdf files with derived coordinate bounds.""" diff --git a/lib/iris/tests/integration/netcdf/derived_bounds/test_bounds_files.py b/lib/iris/tests/integration/netcdf/derived_bounds/test_bounds_files.py new file mode 100644 index 0000000000..968e74b9b3 --- /dev/null +++ b/lib/iris/tests/integration/netcdf/derived_bounds/test_bounds_files.py @@ -0,0 +1,183 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Integration tests for loading of data with bounds of derived coordinates. + +This includes both out "legacy" sample data (which was wrongly recorded), and more +modern testdata (which does not load bounds fully prior to Iris 3.13). + +Both "legacy" and "newstyle" loading behaviour needs to be tested, which depends on the +"FUTURE.derived_bounds" flag setting. +""" + +from pathlib import Path + +import numpy as np +import pytest + +import iris +from iris import FUTURE, sample_data_path +from iris.tests.stock.netcdf import ncgen_from_cdl + +db_testfile_path = Path(__file__).parent / "temp_nc_sources" / "a_new_file.nc" +legacy_filepath = sample_data_path("hybrid_height.nc") + + +@pytest.fixture(params=[True, False], ids=["with_db", "without_db"]) +def derived_bounds(request): + db = request.param + with FUTURE.context(derived_bounds=db): + yield db + + +@pytest.fixture() +def cf_primary_sample_path(tmp_path_factory): + cdl = """ + netcdf a_new_file { + dimensions: + eta = 1 ; + lat = 1 ; + lon = 1 ; + bnds = 2 ; + variables: + double eta(eta) ; + eta:long_name = "eta at full levels" ; + eta:positive = "down" ; + eta:standard_name = "atmosphere_hybrid_sigma_pressure_coordinate" ; + eta:formula_terms = "a: A b: B ps: PS p0: P0" ; + eta:bounds = "eta_bnds" ; + double eta_bnds(eta, bnds) ; + eta_bnds:formula_terms = "a: A_bnds b: B_bnds ps: PS p0: P0" ; + double A(eta) ; + A:long_name = "a coefficient for vertical coordinate at full levels" ; + A:units = "1" ; + double A_bnds(eta, bnds) ; + double B(eta) ; + B:long_name = "b coefficient for vertical coordinate at full levels" ; + B:units = "1" ; + double B_bnds(eta, bnds) ; + double PS(lat, lon) ; + PS:units = "Pa" ; + double P0 ; + P0:units = "Pa" ; + float temp(eta, lat, lon) ; + temp:standard_name = "air_temperature" ; + temp:units = "K" ; + + data: + eta = 1 ; + eta_bnds = 0.5, 1.5 ; + A = 1000 ; + A_bnds = 500, 1500 ; + B = 2 ; + B_bnds = 1, 3 ; + PS = 2000 ; + P0 = 3000 ; + temp = 300 ; + } + """ + dirpath = tmp_path_factory.mktemp("tmp") + filepath = dirpath / "tmp.nc" + ncgen_from_cdl(cdl_str=cdl, cdl_path=None, nc_path=filepath) + return filepath + + +def test_load_legacy_hh(derived_bounds): + cubes = iris.load(legacy_filepath) + + cube_names = sorted([cube.name() for cube in cubes]) + if derived_bounds: + # get an extra promoted cube for the lost 'level-height bounds" + expected_cube_names = [ + "air_potential_temperature", + "level_height_bnds", + "surface_altitude", + ] + else: + expected_cube_names = ["air_potential_temperature", "surface_altitude"] + assert cube_names == expected_cube_names + + main_cube = cubes.extract_cube("air_potential_temperature") + altitude_coord = main_cube.coord("altitude") + assert altitude_coord.has_bounds() + assert altitude_coord.has_lazy_bounds() + assert altitude_coord.shape == main_cube.shape + + level_height_coord = main_cube.coord("atmosphere_hybrid_height_coordinate") + sigma_coord = main_cube.coord("sigma") + surface_altitude_coord = main_cube.coord("surface_altitude") + assert sigma_coord.has_bounds() + assert not surface_altitude_coord.has_bounds() + surface_altitude_cube = cubes.extract_cube("surface_altitude") + assert np.all(surface_altitude_cube.data == surface_altitude_coord.points) + + if not derived_bounds: + assert level_height_coord.has_bounds() + else: + assert not level_height_coord.has_bounds() + + +def test_load_primary_cf_style(derived_bounds, cf_primary_sample_path): + cubes = iris.load(cf_primary_sample_path) + + cube_names = sorted([cube.name() for cube in cubes]) + if derived_bounds: + expected_cube_names = ["PS", "air_temperature"] + else: + # In this case, the bounds are not properly connected + expected_cube_names = ["A_bnds", "B_bnds", "PS", "air_temperature"] + assert cube_names == expected_cube_names + + # Check all the coords on the cube, including whether they have bounds + main_cube = cubes.extract_cube("air_temperature") + assert set(co.name() for co in main_cube.coords()) == set( + [ + "a coefficient for vertical coordinate at full levels", + "b coefficient for vertical coordinate at full levels", + "atmosphere_hybrid_sigma_pressure_coordinate", + "vertical pressure", + "PS", + "air_pressure", + "P0", + ] + ) + + # First, the main hybrid coord + pressure_coord = main_cube.coord("air_pressure") + assert pressure_coord.has_bounds() == derived_bounds + assert pressure_coord.var_name is None + assert main_cube.coord_dims(pressure_coord) == (0, 1, 2) + + co_a = main_cube.coord("a coefficient for vertical coordinate at full levels") + assert co_a.var_name == "A" + assert co_a.has_bounds() == derived_bounds + assert main_cube.coord_dims(co_a) == (0,) + + co_b = main_cube.coord("b coefficient for vertical coordinate at full levels") + assert co_b.var_name == "B" + assert co_b.has_bounds() == derived_bounds + assert main_cube.coord_dims(co_b) == (0,) + + co_eta = main_cube.coord("atmosphere_hybrid_sigma_pressure_coordinate") + assert co_eta.var_name == "eta" + assert co_eta.has_bounds() + assert main_cube.coord_dims(co_eta) == (0,) + + # N.B. this coord is 'made up' by the factory, and does *not* come from a variable + co_VP = main_cube.coord("vertical pressure") + assert co_VP.var_name == "ap" + assert co_VP.has_bounds() == derived_bounds + assert main_cube.coord_dims(co_VP) == (0,) + + # This is the surface pressure + co_PS = main_cube.coord("PS") + assert co_PS.var_name == "PS" + assert not co_PS.has_bounds() + assert main_cube.coord_dims(co_PS) == (1, 2) + + # The scalar reference + co_P0 = main_cube.coord("P0") + assert co_P0.var_name == "P0" + assert not co_P0.has_bounds() + assert main_cube.coord_dims(co_P0) == ()