Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
f799473
Merge pull request #4997 from SciTools/main
pp-mo Sep 28, 2022
471eb53
Split-attrs: Cube metadata refactortests (#4993)
pp-mo Oct 27, 2022
6f436d7
Merge pull request #5164 from SciTools/main
pp-mo Feb 20, 2023
f485f8a
Split attrs - tests for status quo (#4960)
pp-mo Feb 20, 2023
2549f05
Merge branch 'main' into FEATURE_split_attrs
pp-mo Jul 17, 2023
bba4650
Merge branch 'main' into FEATURE_split_attrs
pp-mo Jul 20, 2023
8b751c5
Implement split cube attributes. (#5040)
pp-mo Jul 20, 2023
68eaa53
Splitattrs ncload (#5384)
pp-mo Jul 21, 2023
57eec4d
Split attrs docs (#5418)
trexfeathers Aug 24, 2023
fa7962e
Splitattrs ncsave redo (#5410)
pp-mo Oct 10, 2023
d168a89
Define common-metadata operartions on split attribute dictionaries.
pp-mo Aug 13, 2023
c6c3d71
Tests for split-attributes handling in CubeMetadata operations.
pp-mo Aug 23, 2023
0d9a4e4
Small tidy and clarify.
pp-mo Aug 23, 2023
9d17da4
Common metadata ops support mixed split/unsplit attribute dicts.
pp-mo Aug 24, 2023
28ebcbb
Clarify with better naming, comments, docstrings.
pp-mo Oct 10, 2023
8d61d9b
Remove split-attrs handling to own sourcefile, and implement as a dec…
pp-mo Nov 9, 2023
fb52624
Remove redundant tests duplicated by matrix testcases.
pp-mo Nov 13, 2023
2cd51cd
Newstyle split-attrs matrix testing, with fewer testcases.
pp-mo Nov 13, 2023
07ed579
Small improvements to comments + docstrings.
pp-mo Nov 14, 2023
f8428e1
Ignore linter warning about class equality testing.
pp-mo Nov 14, 2023
f0e532b
Merge branch 'latest' into splitattrs_completion_trial
pp-mo Nov 14, 2023
9bcdaaa
Categorise newly added netcdf saver warnings.
pp-mo Nov 14, 2023
85190b9
Allow exact type equality in tests.
pp-mo Nov 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 27 additions & 14 deletions docs/src/further_topics/metadata.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,16 @@ actual `data attribute`_ names of the metadata members on the Iris class.
metadata members are Iris specific terms, rather than recognised `CF Conventions`_
terms.

.. note::

:class:`~iris.cube.Cube` :attr:`~iris.cube.Cube.attributes` implement the
concept of dataset-level and variable-level attributes, to enable correct
NetCDF loading and saving (see :class:`~iris.cube.CubeAttrsDict` and NetCDF
:func:`~iris.fileformats.netcdf.saver.save` for more). ``attributes`` on
the other classes do not have this distinction, but the ``attributes``
members of ALL the classes still have the same interface, and can be
compared.


Common Metadata API
===================
Expand Down Expand Up @@ -128,10 +138,12 @@ For example, given the following :class:`~iris.cube.Cube`,
source 'Data from Met Office Unified Model 6.05'

We can easily get all of the associated metadata of the :class:`~iris.cube.Cube`
using the ``metadata`` property:
using the ``metadata`` property (note the specialised
:class:`~iris.cube.CubeAttrsDict` for the :attr:`~iris.cube.Cube.attributes`,
as mentioned earlier):

>>> cube.metadata
CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'Conventions': 'CF-1.5', 'STASH': STASH(model=1, section=3, item=236), 'Model scenario': 'A1B', 'source': 'Data from Met Office Unified Model 6.05'}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes=CubeAttrsDict(globals={'Conventions': 'CF-1.5'}, locals={'STASH': STASH(model=1, section=3, item=236), 'Model scenario': 'A1B', 'source': 'Data from Met Office Unified Model 6.05'}), cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))

We can also inspect the ``metadata`` of the ``longitude``
:class:`~iris.coords.DimCoord` attached to the :class:`~iris.cube.Cube` in the same way:
Expand Down Expand Up @@ -675,8 +687,8 @@ For example, consider the following :class:`~iris.common.metadata.CubeMetadata`,

.. doctest:: metadata-combine

>>> cube.metadata # doctest: +SKIP
CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'Conventions': 'CF-1.5', 'STASH': STASH(model=1, section=3, item=236), 'Model scenario': 'A1B', 'source': 'Data from Met Office Unified Model 6.05'}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
>>> cube.metadata
CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes=CubeAttrsDict(globals={'Conventions': 'CF-1.5'}, locals={'STASH': STASH(model=1, section=3, item=236), 'Model scenario': 'A1B', 'source': 'Data from Met Office Unified Model 6.05'}), cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))

We can perform the **identity function** by comparing the metadata with itself,

Expand All @@ -701,7 +713,7 @@ which is replaced with a **different value**,
>>> metadata != cube.metadata
True
>>> metadata.combine(cube.metadata) # doctest: +SKIP
CubeMetadata(standard_name=None, long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'STASH': STASH(model=1, section=3, item=236), 'source': 'Data from Met Office Unified Model 6.05', 'Model scenario': 'A1B', 'Conventions': 'CF-1.5'}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
CubeMetadata(standard_name=None, long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'STASH': STASH(model=1, section=3, item=236), 'Model scenario': 'A1B', 'source': 'Data from Met Office Unified Model 6.05', 'Conventions': 'CF-1.5'}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))

The ``combine`` method combines metadata by performing a **strict** comparison
between each of the associated metadata member values,
Expand All @@ -724,7 +736,7 @@ Let's reinforce this behaviour, but this time by combining metadata where the
>>> metadata != cube.metadata
True
>>> metadata.combine(cube.metadata).attributes
{'Model scenario': 'A1B'}
CubeAttrsDict(globals={}, locals={'Model scenario': 'A1B'})

The combined result for the ``attributes`` member only contains those
**common keys** with **common values**.
Expand Down Expand Up @@ -810,16 +822,17 @@ the ``from_metadata`` class method. For example, given the following

.. doctest:: metadata-convert

>>> cube.metadata # doctest: +SKIP
CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'Conventions': 'CF-1.5', 'STASH': STASH(model=1, section=3, item=236), 'Model scenario': 'A1B', 'source': 'Data from Met Office Unified Model 6.05'}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
>>> cube.metadata
CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes=CubeAttrsDict(globals={'Conventions': 'CF-1.5'}, locals={'STASH': STASH(model=1, section=3, item=236), 'Model scenario': 'A1B', 'source': 'Data from Met Office Unified Model 6.05'}), cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))

We can easily convert it to a :class:`~iris.common.metadata.DimCoordMetadata` instance
using ``from_metadata``,

.. doctest:: metadata-convert

>>> DimCoordMetadata.from_metadata(cube.metadata) # doctest: +SKIP
DimCoordMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'Conventions': 'CF-1.5', 'STASH': STASH(model=1, section=3, item=236), 'Model scenario': 'A1B', 'source': 'Data from Met Office Unified Model 6.05'}, coord_system=None, climatological=None, circular=None)
>>> newmeta = DimCoordMetadata.from_metadata(cube.metadata)
>>> print(newmeta)
DimCoordMetadata(standard_name=air_temperature, var_name=air_temperature, units=K, attributes={'Conventions': 'CF-1.5', 'STASH': STASH(model=1, section=3, item=236), 'Model scenario': 'A1B', 'source': 'Data from Met Office Unified Model 6.05'})

By examining :numref:`metadata members table`, we can see that the
:class:`~iris.cube.Cube` and :class:`~iris.coords.DimCoord` container
Expand Down Expand Up @@ -849,9 +862,9 @@ class instance,

.. doctest:: metadata-convert

>>> longitude.metadata.from_metadata(cube.metadata)
DimCoordMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'Conventions': 'CF-1.5', 'STASH': STASH(model=1, section=3, item=236), 'Model scenario': 'A1B', 'source': 'Data from Met Office Unified Model 6.05'}, coord_system=None, climatological=None, circular=None)

>>> newmeta = longitude.metadata.from_metadata(cube.metadata)
>>> print(newmeta)
DimCoordMetadata(standard_name=air_temperature, var_name=air_temperature, units=K, attributes={'Conventions': 'CF-1.5', 'STASH': STASH(model=1, section=3, item=236), 'Model scenario': 'A1B', 'source': 'Data from Met Office Unified Model 6.05'})

.. _metadata assignment:

Expand Down Expand Up @@ -978,7 +991,7 @@ Indeed, it's also possible to assign to the ``metadata`` property with a
>>> longitude.metadata
DimCoordMetadata(standard_name='longitude', long_name=None, var_name='longitude', units=Unit('degrees'), attributes={}, coord_system=GeogCS(6371229.0), climatological=False, circular=False)
>>> longitude.metadata = cube.metadata
>>> longitude.metadata # doctest: +SKIP
>>> longitude.metadata
DimCoordMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'Conventions': 'CF-1.5', 'STASH': STASH(model=1, section=3, item=236), 'Model scenario': 'A1B', 'source': 'Data from Met Office Unified Model 6.05'}, coord_system=GeogCS(6371229.0), climatological=False, circular=False)

Note that, only **common** metadata members will be assigned new associated
Expand Down
5 changes: 4 additions & 1 deletion docs/src/userguide/iris_cubes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,10 @@ A cube consists of:
data dimensions as the coordinate has dimensions.

* an attributes dictionary which, other than some protected CF names, can
hold arbitrary extra metadata.
hold arbitrary extra metadata. This implements the concept of dataset-level
and variable-level attributes when loading and and saving NetCDF files (see
:class:`~iris.cube.CubeAttrsDict` and NetCDF
:func:`~iris.fileformats.netcdf.saver.save` for more).
* a list of cell methods to represent operations which have already been
applied to the data (e.g. "mean over time")
* a list of coordinate "factories" used for deriving coordinates from the
Expand Down
15 changes: 13 additions & 2 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,17 @@ This document explains the changes made to Iris for this release
✨ Features
===========

#. `@pp-mo`_, `@lbdreyer`_ and `@trexfeathers`_ improved
:class:`~iris.cube.Cube` :attr:`~iris.cube.Cube.attributes` handling to
better preserve the distinction between dataset-level and variable-level
attributes, allowing file-Cube-file round-tripping of NetCDF attributes. See
:class:`~iris.cube.CubeAttrsDict` and NetCDF
:func:`~iris.fileformats.netcdf.saver.save` for more. (:pull:`5152`,
`split attributes project`_)

#. `@rcomer`_ rewrote :func:`~iris.util.broadcast_to_shape` so it now handles
lazy data. (:pull:`5307`)

#. `@trexfeathers`_ and `@HGWright`_ (reviewer) sub-categorised all Iris'
:class:`UserWarning`\s for richer filtering. The full index of
sub-categories can be seen here: :mod:`iris.exceptions` . (:pull:`5498`)
Expand Down Expand Up @@ -145,6 +156,6 @@ This document explains the changes made to Iris for this release
.. comment
Whatsnew resources in alphabetical order:

.. _NEP29 Drop Schedule: https://numpy.org/neps/nep-0029-deprecation_policy.html#drop-schedule
.. _codespell: https://github.com/codespell-project/codespell

.. _split attributes project: https://github.com/orgs/SciTools/projects/5?pane=info
.. _NEP29 Drop Schedule: https://numpy.org/neps/nep-0029-deprecation_policy.html#drop-schedule
16 changes: 13 additions & 3 deletions lib/iris/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,9 @@ def callback(cube, field, filename):
class Future(threading.local):
"""Run-time configuration controller."""

def __init__(self, datum_support=False, pandas_ndim=False):
def __init__(
self, datum_support=False, pandas_ndim=False, save_split_attrs=False
):
"""
A container for run-time options controls.

Expand All @@ -163,6 +165,11 @@ def __init__(self, datum_support=False, pandas_ndim=False):
pandas_ndim : bool, default=False
See :func:`iris.pandas.as_data_frame` for details - opts in to the
newer n-dimensional behaviour.
save_split_attrs : bool, default=False
Save "global" and "local" cube attributes to netcdf in appropriately
different ways : "global" ones are saved as dataset attributes, where
possible, while "local" ones are saved as data-variable attributes.
See :func:`iris.fileformats.netcdf.saver.save`.

"""
# The flag 'example_future_flag' is provided as a reference for the
Expand All @@ -174,14 +181,17 @@ def __init__(self, datum_support=False, pandas_ndim=False):
# self.__dict__['example_future_flag'] = example_future_flag
self.__dict__["datum_support"] = datum_support
self.__dict__["pandas_ndim"] = pandas_ndim
self.__dict__["save_split_attrs"] = save_split_attrs
# TODO: next major release: set IrisDeprecation to subclass
# DeprecationWarning instead of UserWarning.

def __repr__(self):
# msg = ('Future(example_future_flag={})')
# return msg.format(self.example_future_flag)
msg = "Future(datum_support={}, pandas_ndim={})"
return msg.format(self.datum_support, self.pandas_ndim)
msg = "Future(datum_support={}, pandas_ndim={}, save_split_attrs={})"
return msg.format(
self.datum_support, self.pandas_ndim, self.save_split_attrs
)

# deprecated_options = {'example_future_flag': 'warning',}
deprecated_options = {}
Expand Down
122 changes: 122 additions & 0 deletions lib/iris/common/_split_attribute_dicts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""
Dictionary operations for dealing with the CubeAttrsDict "split"-style attribute
dictionaries.

The idea here is to convert a split-dictionary into a "plain" one for calculations,
whose keys are all pairs of the form ('global', <keyname>) or ('local', <keyname>).
And to convert back again after the operation, if the result is a dictionary.

For "strict" operations this clearly does all that is needed. For lenient ones,
we _might_ want for local+global attributes of the same name to interact.
However, on careful consideration, it seems that this is not actually desirable for
any of the common-metadata operations.
So, we simply treat "global" and "local" attributes of the same name as entirely
independent. Which happily is also the easiest to code, and to explain.
"""

from collections.abc import Mapping, Sequence
from functools import wraps


def _convert_splitattrs_to_pairedkeys_dict(dic):
"""
Convert a split-attributes dictionary to a "normal" dict.

Transform a :class:`~iris.cube.CubeAttributesDict` "split" attributes dictionary
into a 'normal' :class:`dict`, with paired keys of the form ('global', name) or
('local', name).
"""

def _global_then_local_items(dic):
# Routine to produce global, then local 'items' in order, and with all keys
# "labelled" as local or global type, to ensure they are all unique.
for key, value in dic.globals.items():
yield ("global", key), value
for key, value in dic.locals.items():
yield ("local", key), value

return dict(_global_then_local_items(dic))


def _convert_pairedkeys_dict_to_splitattrs(dic):
"""
Convert an input with global/local paired keys back into a split-attrs dict.

For now, this is always and only a :class:`iris.cube.CubeAttrsDict`.
"""
from iris.cube import CubeAttrsDict

result = CubeAttrsDict()
for key, value in dic.items():
keytype, keyname = key
if keytype == "global":
result.globals[keyname] = value
else:
assert keytype == "local"
result.locals[keyname] = value
return result


def adjust_for_split_attribute_dictionaries(operation):
"""
Decorator to make a function of attribute-dictionaries work with split attributes.

The wrapped function of attribute-dictionaries is currently always one of "equals",
"combine" or "difference", with signatures like :
equals(left: dict, right: dict) -> bool
combine(left: dict, right: dict) -> dict
difference(left: dict, right: dict) -> None | (dict, dict)

The results of the wrapped operation are either :
* for "equals" (or "__eq__") : a boolean
* for "combine" : a (converted) attributes-dictionary
* for "difference" : a list of (None or "pair"), where a pair contains two
dictionaries

Before calling the wrapped operation, its inputs (left, right) are modified by
converting any "split" dictionaries to a form where the keys are pairs
of the form ("global", name) or ("local", name).

After calling the wrapped operation, for "combine" or "difference", the result can
contain a dictionary or dictionaries. These are then transformed back from the
'converted' form to split-attribute dictionaries, before returning.

"Split" dictionaries are all of class :class:`~iris.cube.CubeAttrsDict`, since
the only usage of 'split' attribute dictionaries is in Cubes (i.e. they are not
used for cube components).
"""

@wraps(operation)
def _inner_function(*args, **kwargs):
from iris.cube import CubeAttrsDict

# First make all inputs into CubeAttrsDict, if not already.
args = [
arg if isinstance(arg, CubeAttrsDict) else CubeAttrsDict(arg)
for arg in args
]
# Convert all inputs into 'pairedkeys' type dicts
args = [_convert_splitattrs_to_pairedkeys_dict(arg) for arg in args]

result = operation(*args, **kwargs)

# Convert 'pairedkeys' dicts in the result back to split-attributes form.
if isinstance(result, Mapping):
# Fix a result which is a single dictionary -- for "combine"
result = _convert_pairedkeys_dict_to_splitattrs(result)
elif isinstance(result, Sequence) and len(result) == 2:
# Fix a result which is a pair of dictionaries -- for "difference"
left, right = result
left, right = (
_convert_pairedkeys_dict_to_splitattrs(left),
_convert_pairedkeys_dict_to_splitattrs(right),
)
result = result.__class__([left, right])

return result

return _inner_function
47 changes: 46 additions & 1 deletion lib/iris/common/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from xxhash import xxh64_hexdigest

from ..config import get_logger
from ._split_attribute_dicts import adjust_for_split_attribute_dictionaries
from .lenient import _LENIENT
from .lenient import _lenient_service as lenient_service
from .lenient import _qualname as qualname
Expand Down Expand Up @@ -241,7 +242,11 @@ def __str__(self):
field_strings = []
for field in self._fields:
value = getattr(self, field)
if value is None or isinstance(value, (str, dict)) and not value:
if (
value is None
or isinstance(value, (str, Mapping))
and not value
):
continue
field_strings.append(f"{field}={value}")

Expand Down Expand Up @@ -1250,6 +1255,46 @@ def _check(item):

return result

#
# Override each of the attribute-dict operations in BaseMetadata, to enable
# them to deal with split-attribute dictionaries correctly.
# There are 6 of these, for (equals/combine/difference) * (lenient/strict).
# Each is overridden with a *wrapped* version of the parent method, using the
# "@adjust_for_split_attribute_dictionaries" decorator, which converts any
# split-attribute dictionaries in the inputs to ordinary dicts, and likewise
# re-converts any dictionaries in the return value.
#

@staticmethod
@adjust_for_split_attribute_dictionaries
def _combine_lenient_attributes(left, right):
return BaseMetadata._combine_lenient_attributes(left, right)

@staticmethod
@adjust_for_split_attribute_dictionaries
def _combine_strict_attributes(left, right):
return BaseMetadata._combine_strict_attributes(left, right)

@staticmethod
@adjust_for_split_attribute_dictionaries
def _compare_lenient_attributes(left, right):
return BaseMetadata._compare_lenient_attributes(left, right)

@staticmethod
@adjust_for_split_attribute_dictionaries
def _compare_strict_attributes(left, right):
return BaseMetadata._compare_strict_attributes(left, right)

@staticmethod
@adjust_for_split_attribute_dictionaries
def _difference_lenient_attributes(left, right):
return BaseMetadata._difference_lenient_attributes(left, right)

@staticmethod
@adjust_for_split_attribute_dictionaries
def _difference_strict_attributes(left, right):
return BaseMetadata._difference_strict_attributes(left, right)


class DimCoordMetadata(CoordMetadata):
"""
Expand Down
Loading