Skip to content
5 changes: 5 additions & 0 deletions lib/iris/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ def __init__(
pandas_ndim=False,
save_split_attrs=False,
date_microseconds=False,
derived_bounds=False,
):
"""Container for run-time options controls.

Expand Down Expand Up @@ -202,6 +203,8 @@ def __init__(
behaviour, such as when using :class:`~iris.Constraint`, and you
may need to defend against floating point precision issues where
you didn't need to before.
derived_bounds : bool, default=False
When True, uses the new form for deriving bounds with the load.

"""
# The flag 'example_future_flag' is provided as a reference for the
Expand All @@ -215,6 +218,7 @@ def __init__(
self.__dict__["pandas_ndim"] = pandas_ndim
self.__dict__["save_split_attrs"] = save_split_attrs
self.__dict__["date_microseconds"] = date_microseconds
self.__dict__["derived_bounds"] = derived_bounds

# TODO: next major release: set IrisDeprecation to subclass
# DeprecationWarning instead of UserWarning.
Expand All @@ -228,6 +232,7 @@ def __repr__(self):
self.pandas_ndim,
self.save_split_attrs,
self.date_microseconds,
self.derived_bounds,
)

# deprecated_options = {'example_future_flag': 'warning',}
Expand Down
94 changes: 88 additions & 6 deletions lib/iris/fileformats/cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from abc import ABCMeta, abstractmethod
from collections.abc import Iterable, MutableMapping
import contextlib
import os
import re
from typing import ClassVar, Optional
Expand Down Expand Up @@ -94,6 +95,8 @@ def __init__(self, name, data):
#: CF-netCDF formula terms that his variable participates in.
self.cf_terms_by_root = {}

self._to_be_promoted = False

self.cf_attrs_reset()

@staticmethod
Expand Down Expand Up @@ -1416,17 +1419,79 @@ def _translate(self, variables):
# Identify and register all CF formula terms.
formula_terms = _CFFormulaTermsVariable.identify(variables)

if iris.FUTURE.derived_bounds:
# Keep track of all the root vars so we can unpick invalid bounds vars
all_roots = set()

# cf_var = CFFormulaTermsVariable (loops through everything that appears in formula terms)
for cf_var in formula_terms.values():
# eg. eta:'a' | cf_root = eta and cf_term = a. cf_var.cf_terms_by_root = {'eta': 'a'} (looking at all appearances in formula terms)
for cf_root, cf_term in cf_var.cf_terms_by_root.items():
# Ignore formula terms owned by a bounds variable.
if iris.FUTURE.derived_bounds:
# For the "newstyle" derived-bounds implementation, find vars which appear in derived bounds terms
# and turn them into bounds vars (though they don't appear in a "bounds" attribute)
all_roots.add(cf_root)

# cf_root_coord = CFCoordinateVariable of the coordinate relating to the root
cf_root_coord = self.cf_group.coordinates.get(cf_root)
if cf_root_coord is None:
cf_root_coord = self.cf_group.auxiliary_coordinates.get(cf_root)

root_bounds_name = getattr(cf_root_coord, "bounds", None)
# N.B. cf_root_coord may here be None, if the root var was not a
# coord - that is ok, it will not have a 'bounds', we will skip it.
if root_bounds_name in self.cf_group:
# Found a valid *root* bounds variable : search for a corresponding *term* bounds variable
term_bounds_vars = [
# loop through all formula terms and add them if they have a cf_term_by_root
# where (bounds of cf_root): cf_term (same as before)
f
for f in formula_terms.values()
if f.cf_terms_by_root.get(root_bounds_name) == cf_term
]
if len(term_bounds_vars) == 1:
(term_bounds_var,) = term_bounds_vars
if term_bounds_var != cf_var:
# N.B. bounds==main-var is valid CF for *no* bounds
cf_var.bounds = term_bounds_var.cf_name
new_var = CFBoundaryVariable(
term_bounds_var.cf_name, term_bounds_var.cf_data
)
new_var.add_formula_term(root_bounds_name, cf_term)
# "Reclassify" this var as a bounds variable
self.cf_group[term_bounds_var.cf_name] = new_var
else:
# Found 0 term bounds, or >1 : discard the term
# Modify the boundary_variable set _to_be_promoted to True
self.cf_group.get(root_bounds_name)._to_be_promoted = True

if cf_root not in self.cf_group.bounds:
# TODO: explain this section ?
cf_name = cf_var.cf_name
if cf_var.cf_name not in self.cf_group:
self.cf_group[cf_name] = CFAuxiliaryCoordinateVariable(
cf_name, cf_var.cf_data
)
if cf_name not in self.cf_group:
new_var = CFAuxiliaryCoordinateVariable(cf_name, cf_var.cf_data)
if iris.FUTURE.derived_bounds and hasattr(cf_var, "bounds"):
# Implement "new-style" derived bounds link
new_var.bounds = cf_var.bounds
self.cf_group[cf_name] = new_var

self.cf_group[cf_name].add_formula_term(cf_root, cf_term)

if iris.FUTURE.derived_bounds:
for cf_root in all_roots:
# Invalidate "broken" bounds connections
root_var = self.cf_group[cf_root]
if all(
key in root_var.ncattrs() for key in ("bounds", "formula_terms")
):
root_bounds_var = self.cf_group.get(root_var.bounds)
if (
root_bounds_var is not None
and "formula_terms" not in root_bounds_var.ncattrs()
):
# This means it is *not* a valid bounds var, according to CF
root_var.bounds = None

# Determine the CF data variables.
data_variable_names = (
set(netcdf_variable_names) - self.cf_group.non_data_variable_names
Expand Down Expand Up @@ -1454,7 +1519,7 @@ def _span_check(
"""Sanity check dimensionality."""
var = self.cf_group[var_name]
# No span check is necessary if variable is attached to a mesh.
if is_mesh_var or var.spans(cf_variable):
if (is_mesh_var or var.spans(cf_variable)) and not var._to_be_promoted:
cf_group[var_name] = var
else:
# Register the ignored variable.
Expand Down Expand Up @@ -1498,6 +1563,16 @@ def _span_check(
for cf_name in match:
_span_check(cf_name)

if iris.FUTURE.derived_bounds:
# TODO: explain this section
if hasattr(cf_variable, "bounds"):
if cf_variable.bounds not in cf_group:
bounds_var = self.cf_group.get(cf_variable.bounds)
if bounds_var:
# TODO: warning if span fails
if bounds_var.spans(cf_variable):
cf_group[cf_variable.bounds] = bounds_var

# Build CF data variable relationships.
if isinstance(cf_variable, CFDataVariable):
# Add global netCDF attributes.
Expand Down Expand Up @@ -1540,8 +1615,14 @@ def _span_check(
# may be promoted to a CFDataVariable and restrict promotion to only
# those formula terms that are reference surface/phenomenon.
for cf_var in self.cf_group.formula_terms.values():
if iris.FUTURE.derived_bounds:
if self.cf_group[cf_var.cf_name] is CFBoundaryVariable:
continue
for cf_root, cf_term in cf_var.cf_terms_by_root.items():
cf_root_var = self.cf_group[cf_root]
if iris.FUTURE.derived_bounds:
if not hasattr(cf_root_var, "standard_name"):
continue
name = cf_root_var.standard_name or cf_root_var.long_name
terms = reference_terms.get(name, [])
if isinstance(terms, str) or not isinstance(terms, Iterable):
Expand All @@ -1552,6 +1633,7 @@ def _span_check(
self.cf_group.promoted[cf_var_name] = data_var
_build(data_var)
break

# Promote any ignored variables.
promoted = set()
not_promoted = ignored.difference(promoted)
Expand Down
5 changes: 5 additions & 0 deletions lib/iris/tests/integration/netcdf/derived_bounds/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Integration tests for loading and saving netcdf files with derived coordinate bounds."""
183 changes: 183 additions & 0 deletions lib/iris/tests/integration/netcdf/derived_bounds/test_bounds_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Integration tests for loading of data with bounds of derived coordinates.

This includes both out "legacy" sample data (which was wrongly recorded), and more
modern testdata (which does not load bounds fully prior to Iris 3.13).

Both "legacy" and "newstyle" loading behaviour needs to be tested, which depends on the
"FUTURE.derived_bounds" flag setting.
"""

from pathlib import Path

import numpy as np
import pytest

import iris
from iris import FUTURE, sample_data_path
from iris.tests.stock.netcdf import ncgen_from_cdl

db_testfile_path = Path(__file__).parent / "temp_nc_sources" / "a_new_file.nc"
legacy_filepath = sample_data_path("hybrid_height.nc")


@pytest.fixture(params=[True, False], ids=["with_db", "without_db"])
def derived_bounds(request):
db = request.param
with FUTURE.context(derived_bounds=db):
yield db


@pytest.fixture()
def cf_primary_sample_path(tmp_path_factory):
cdl = """
netcdf a_new_file {
dimensions:
eta = 1 ;
lat = 1 ;
lon = 1 ;
bnds = 2 ;
variables:
double eta(eta) ;
eta:long_name = "eta at full levels" ;
eta:positive = "down" ;
eta:standard_name = "atmosphere_hybrid_sigma_pressure_coordinate" ;
eta:formula_terms = "a: A b: B ps: PS p0: P0" ;
eta:bounds = "eta_bnds" ;
double eta_bnds(eta, bnds) ;
eta_bnds:formula_terms = "a: A_bnds b: B_bnds ps: PS p0: P0" ;
double A(eta) ;
A:long_name = "a coefficient for vertical coordinate at full levels" ;
A:units = "1" ;
double A_bnds(eta, bnds) ;
double B(eta) ;
B:long_name = "b coefficient for vertical coordinate at full levels" ;
B:units = "1" ;
double B_bnds(eta, bnds) ;
double PS(lat, lon) ;
PS:units = "Pa" ;
double P0 ;
P0:units = "Pa" ;
float temp(eta, lat, lon) ;
temp:standard_name = "air_temperature" ;
temp:units = "K" ;

data:
eta = 1 ;
eta_bnds = 0.5, 1.5 ;
A = 1000 ;
A_bnds = 500, 1500 ;
B = 2 ;
B_bnds = 1, 3 ;
PS = 2000 ;
P0 = 3000 ;
temp = 300 ;
}
"""
dirpath = tmp_path_factory.mktemp("tmp")
filepath = dirpath / "tmp.nc"
ncgen_from_cdl(cdl_str=cdl, cdl_path=None, nc_path=filepath)
return filepath


def test_load_legacy_hh(derived_bounds):
cubes = iris.load(legacy_filepath)

cube_names = sorted([cube.name() for cube in cubes])
if derived_bounds:
# get an extra promoted cube for the lost 'level-height bounds"
expected_cube_names = [
"air_potential_temperature",
"level_height_bnds",
"surface_altitude",
]
else:
expected_cube_names = ["air_potential_temperature", "surface_altitude"]
assert cube_names == expected_cube_names

main_cube = cubes.extract_cube("air_potential_temperature")
altitude_coord = main_cube.coord("altitude")
assert altitude_coord.has_bounds()
assert altitude_coord.has_lazy_bounds()
assert altitude_coord.shape == main_cube.shape

level_height_coord = main_cube.coord("atmosphere_hybrid_height_coordinate")
sigma_coord = main_cube.coord("sigma")
surface_altitude_coord = main_cube.coord("surface_altitude")
assert sigma_coord.has_bounds()
assert not surface_altitude_coord.has_bounds()
surface_altitude_cube = cubes.extract_cube("surface_altitude")
assert np.all(surface_altitude_cube.data == surface_altitude_coord.points)

if not derived_bounds:
assert level_height_coord.has_bounds()
else:
assert not level_height_coord.has_bounds()


def test_load_primary_cf_style(derived_bounds, cf_primary_sample_path):
cubes = iris.load(cf_primary_sample_path)

cube_names = sorted([cube.name() for cube in cubes])
if derived_bounds:
expected_cube_names = ["PS", "air_temperature"]
else:
# In this case, the bounds are not properly connected
expected_cube_names = ["A_bnds", "B_bnds", "PS", "air_temperature"]
assert cube_names == expected_cube_names

# Check all the coords on the cube, including whether they have bounds
main_cube = cubes.extract_cube("air_temperature")
assert set(co.name() for co in main_cube.coords()) == set(
[
"a coefficient for vertical coordinate at full levels",
"b coefficient for vertical coordinate at full levels",
"atmosphere_hybrid_sigma_pressure_coordinate",
"vertical pressure",
"PS",
"air_pressure",
"P0",
]
)

# First, the main hybrid coord
pressure_coord = main_cube.coord("air_pressure")
assert pressure_coord.has_bounds() == derived_bounds
assert pressure_coord.var_name is None
assert main_cube.coord_dims(pressure_coord) == (0, 1, 2)

co_a = main_cube.coord("a coefficient for vertical coordinate at full levels")
assert co_a.var_name == "A"
assert co_a.has_bounds() == derived_bounds
assert main_cube.coord_dims(co_a) == (0,)

co_b = main_cube.coord("b coefficient for vertical coordinate at full levels")
assert co_b.var_name == "B"
assert co_b.has_bounds() == derived_bounds
assert main_cube.coord_dims(co_b) == (0,)

co_eta = main_cube.coord("atmosphere_hybrid_sigma_pressure_coordinate")
assert co_eta.var_name == "eta"
assert co_eta.has_bounds()
assert main_cube.coord_dims(co_eta) == (0,)

# N.B. this coord is 'made up' by the factory, and does *not* come from a variable
co_VP = main_cube.coord("vertical pressure")
assert co_VP.var_name == "ap"
assert co_VP.has_bounds() == derived_bounds
assert main_cube.coord_dims(co_VP) == (0,)

# This is the surface pressure
co_PS = main_cube.coord("PS")
assert co_PS.var_name == "PS"
assert not co_PS.has_bounds()
assert main_cube.coord_dims(co_PS) == (1, 2)

# The scalar reference
co_P0 = main_cube.coord("P0")
assert co_P0.var_name == "P0"
assert not co_P0.has_bounds()
assert main_cube.coord_dims(co_P0) == ()
Loading