Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions doc/develop/preprocessor_function.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,34 @@ relevant :ref:`iris <iris_docs>` code.
Code that is more involved, e.g. lots of work with Numpy and Dask arrays,
and more broadly applicable, should be implemented in iris instead.

Metadata
========

Preprocessor functions may change the metadata of datasets.
An obvious example is :func:`~esmvalcore.preprocessor.convert_units`, which
changes units.
If cube metadata is changed in a preprocessor function, the :ref:`metadata.yml
<metadata_yml>` file is automatically updated with this information.
The following attributes are taken into account:

+------------------------------------+--------------------------------------------+
| Attribute in ``metadata.yml`` file | Updated from |
+====================================+============================================+
| ``standard_name`` | :attr:`iris.cube.Cube.standard_name` |
+------------------------------------+--------------------------------------------+
| ``long_name`` | :attr:`iris.cube.Cube.long_name` |
+------------------------------------+--------------------------------------------+
| ``short_name`` | :attr:`iris.cube.Cube.var_name` |
+------------------------------------+--------------------------------------------+
| ``units`` | :attr:`iris.cube.Cube.units` |
+------------------------------------+--------------------------------------------+
| ``frequency`` | ``iris.cube.Cube.attributes['frequency']`` |
+------------------------------------+--------------------------------------------+

If a given cube property is ``None``, the corresponding attribute is updated
with an empty string (``''``).
If a cube property is not given, the corresponding attribute is not updated.

Documentation
=============

Expand Down
2 changes: 2 additions & 0 deletions doc/quickstart/output.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ The first item in the settings file will be a list of `Metadata.yml`_ files.
There is a metadata.yml file generated for each field in each diagnostic.


.. _metadata_yml:

Metadata.yml
============

Expand Down
63 changes: 44 additions & 19 deletions esmvalcore/preprocessor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,9 @@ def check_preprocessor_settings(settings):
for step in settings:
if step not in DEFAULT_ORDER:
raise ValueError(
"Unknown preprocessor function '{}', choose from: {}".format(
step, ', '.join(DEFAULT_ORDER)))
f"Unknown preprocessor function '{step}', choose from: "
f"{', '.join(DEFAULT_ORDER)}"
)

function = function = globals()[step]
argspec = inspect.getfullargspec(function)
Expand All @@ -239,18 +240,20 @@ def check_preprocessor_settings(settings):
invalid_args = set(settings[step]) - set(args)
if invalid_args:
raise ValueError(
"Invalid argument(s): {} encountered for preprocessor "
"function {}. \nValid arguments are: [{}]".format(
', '.join(invalid_args), step, ', '.join(args)))
f"Invalid argument(s): {', '.join(invalid_args)} "
f"encountered for preprocessor function {step}. \n"
f"Valid arguments are: [{', '.join(args)}]"
)

# Check for missing arguments
defaults = argspec.defaults
end = None if defaults is None else -len(defaults)
missing_args = set(args[:end]) - set(settings[step])
if missing_args:
raise ValueError(
"Missing required argument(s) {} for preprocessor "
"function {}".format(missing_args, step))
f"Missing required argument(s) {missing_args} for "
f"preprocessor function {step}"
)
# Final sanity check in case the above fails to catch a mistake
try:
signature = inspect.Signature.from_callable(function)
Expand All @@ -277,10 +280,9 @@ def _check_multi_model_settings(products):
elif reference.settings[step] != settings:
raise ValueError(
"Unable to combine differing multi-dataset settings for "
"{} and {}, {} and {}".format(reference.filename,
product.filename,
reference.settings[step],
settings))
f"{reference.filename} and {product.filename}, "
f"{reference.settings[step]} and {settings}"
)


def _get_multi_model_settings(products, step):
Expand Down Expand Up @@ -416,8 +418,8 @@ def apply(self, step, debug=False):
"""Apply preprocessor step to product."""
if step not in self.settings:
raise ValueError(
"PreprocessorFile {} has no settings for step {}".format(
self, step))
f"PreprocessorFile {self} has no settings for step {step}"
)
self.cubes = preprocess(self.cubes, step,
input_files=self._input_files,
**self.settings[step])
Expand Down Expand Up @@ -463,10 +465,34 @@ def save(self):
def close(self):
"""Close the file."""
if self._cubes is not None:
self._update_attributes()
self.save()
self._cubes = None
self.save_provenance()

def _update_attributes(self):
"""Update product attributes from cube metadata."""
if not self._cubes:
return
ref_cube = self._cubes[0]

# Names
names = {
'standard_name': 'standard_name',
'long_name': 'long_name',
'var_name': 'short_name',
}
for (name_in, name_out) in names.items():
cube_val = getattr(ref_cube, name_in)
self.attributes[name_out] = '' if cube_val is None else cube_val

# Units
self.attributes['units'] = str(ref_cube.units)

# Frequency
if 'frequency' in ref_cube.attributes:
self.attributes['frequency'] = ref_cube.attributes['frequency']

@property
def is_closed(self):
"""Check if the file is closed."""
Expand Down Expand Up @@ -625,11 +651,10 @@ def __str__(self):
]
products = '\n\n'.join('\n'.join([str(p), pformat(p.settings)])
for p in self.products)
txt = "{}: {}\norder: {}\n{}\n{}".format(
self.__class__.__name__,
self.name,
order,
products,
txt = "\n".join([
f"{self.__class__.__name__}: {self.name}",
f"order: {order}",
f"{products}",
self.print_ancestors(),
)
])
return txt
142 changes: 142 additions & 0 deletions tests/unit/preprocessor/test_preprocessor_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
"""Unit tests for :class:`esmvalcore.preprocessor.PreprocessorFile`."""

from unittest import mock

import pytest
from iris.cube import Cube, CubeList

from esmvalcore.preprocessor import PreprocessorFile

ATTRIBUTES = {
'filename': 'file.nc',
'standard_name': 'precipitation',
'long_name': 'Precipitation',
'short_name': 'pr',
'units': 'kg m-2 s-1',
'frequency': 'mon',
}


@pytest.fixture
def product():
"""PreprocessorFile object used for testing."""
cube = Cube(
0,
var_name='tas',
standard_name='air_temperature',
long_name='Near-Surface Air Temperature',
units='K',
attributes={'frequency': 'day'},
)
product = PreprocessorFile(attributes=ATTRIBUTES, settings={})
product._cubes = CubeList([cube, cube, cube])
return product


def test_update_attributes_empty_cubes(product):
"""Test ``_update_attributes``."""
product._cubes = CubeList([])
product._update_attributes()

assert not product._cubes
assert product.attributes == ATTRIBUTES


def test_update_attributes(product):
"""Test ``_update_attributes``."""
product._update_attributes()

assert product.attributes == {
'filename': 'file.nc',
'standard_name': 'air_temperature',
'long_name': 'Near-Surface Air Temperature',
'short_name': 'tas',
'units': 'K',
'frequency': 'day',
}
assert isinstance(product.attributes['units'], str)


@pytest.mark.parametrize(
'name,cube_property,expected_name',
[
('standard_name', 'standard_name', ''),
('long_name', 'long_name', ''),
('short_name', 'var_name', ''),
],
)
def test_update_attributes_empty_names(product, name, cube_property,
expected_name):
"""Test ``_update_attributes``."""
setattr(product._cubes[0], cube_property, None)
product._update_attributes()

expected_attributes = {
'filename': 'file.nc',
'standard_name': 'air_temperature',
'long_name': 'Near-Surface Air Temperature',
'short_name': 'tas',
'units': 'K',
'frequency': 'day',
}
expected_attributes[name] = expected_name
assert product.attributes == expected_attributes
assert isinstance(product.attributes['units'], str)


def test_update_attributes_empty_frequency(product):
"""Test ``_update_attributes``."""
product._cubes[0].attributes.pop('frequency')
product._update_attributes()

assert product.attributes == {
'filename': 'file.nc',
'standard_name': 'air_temperature',
'long_name': 'Near-Surface Air Temperature',
'short_name': 'tas',
'units': 'K',
'frequency': 'mon',
}
assert isinstance(product.attributes['units'], str)


def test_update_attributes_no_frequency(product):
"""Test ``_update_attributes``."""
product._cubes[0].attributes.pop('frequency')
product.attributes.pop('frequency')
product._update_attributes()

assert product.attributes == {
'filename': 'file.nc',
'standard_name': 'air_temperature',
'long_name': 'Near-Surface Air Temperature',
'short_name': 'tas',
'units': 'K',
}
assert isinstance(product.attributes['units'], str)


def test_close_no_cubes():
"""Test ``close``."""
product = mock.create_autospec(PreprocessorFile, instance=True)
product._cubes = None

PreprocessorFile.close(product)

product._update_attributes.assert_not_called()
product.save.assert_not_called()
product.save_provenance.assert_not_called()
assert product._cubes is None


def test_close():
"""Test ``close``."""
product = mock.create_autospec(PreprocessorFile, instance=True)
product._cubes = CubeList([Cube(0)])

PreprocessorFile.close(product)

product._update_attributes.assert_called_once_with()
product.save.assert_called_once_with()
product.save_provenance.assert_called_once_with()
assert product._cubes is None