diff --git a/doc/changelog.rst b/doc/changelog.rst index c4a2fb90de..0b1170fe2f 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -2,6 +2,28 @@ Changelog ========= +.. _changelog-v2-7-1: + + +v2.7.1 +------ +Highlights +~~~~~~~~~~ + +This is a bugfix release where we unpin `cf-units` to allow for latest `iris=3.4.0` to be installed. + +Installation +~~~~~~~~~~~~ + +- Unpin cf-units (`#1770 `__) `Bouwe Andela `__ + +Bug fixes +~~~~~~~~~ + +- Improve error handling if an esgf index node is offline (`#1834 `__) `Bouwe Andela `__ + + + .. _changelog-v2-7-0: diff --git a/environment.yml b/environment.yml index c219e6a5a4..53253bcb78 100644 --- a/environment.yml +++ b/environment.yml @@ -2,20 +2,19 @@ name: esmvaltool channels: - conda-forge + - nodefaults dependencies: - cartopy - - cf-units>=3.0.0,<3.1.0 # github.com/ESMValGroup/ESMValCore/issues/1655 + - cf-units - cftime - compilers - # 1.8.18/py39, they seem weary to build manylinux wheels - # and pypi ver built with older gdal - fiona - esmpy!=8.1.0 # see github.com/ESMValGroup/ESMValCore/issues/1208 - geopy - iris>=3.2.1 - nested-lookup - - netcdf4!=1.6.1 # github.com/ESMValGroup/ESMValCore/issues/1723 + - netcdf4 - pandas - pillow - pip!=21.3 diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py index 734d401726..5712ecc22b 100644 --- a/esmvalcore/cmor/check.py +++ b/esmvalcore/cmor/check.py @@ -945,7 +945,7 @@ def _simplify_calendar(calendar): calendar_aliases = { 'all_leap': '366_day', 'noleap': '365_day', - 'standard': 'gregorian', + 'gregorian': 'standard', } return calendar_aliases.get(calendar, calendar) diff --git a/esmvalcore/esgf/_search.py b/esmvalcore/esgf/_search.py index 9dc831f992..28198317c9 100644 --- a/esmvalcore/esgf/_search.py +++ b/esmvalcore/esgf/_search.py @@ -110,7 +110,12 @@ def _search_index_nodes(facets): ) FIRST_ONLINE_INDEX_NODE = url return results - except requests.exceptions.Timeout as error: + except ( + requests.exceptions.ConnectionError, + requests.exceptions.HTTPError, + requests.exceptions.Timeout, + ) as error: + logger.debug("Unable to connect to %s due to %s", url, error) errors.append(error) raise FileNotFoundError("Failed to search ESGF, unable to connect:\n" + diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 9c9fa9662e..1b994ecbad 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -96,7 +96,7 @@ def _unify_time_coordinates(cubes): are no mismatches in the time arrays. If cubes have different time units, it will reset the calendar to a - default gregorian calendar with unit "days since 1850-01-01". + the "standard" calendar with unit "days since 1850-01-01". Might not work for (sub)daily data, because different calendars may have different number of days in the year. diff --git a/pyproject.toml b/pyproject.toml index 7688e015c5..a1dce171b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,6 +3,7 @@ requires = ["setuptools >= 40.6.0", "wheel", "setuptools_scm>=6.2"] build-backend = "setuptools.build_meta" [tool.setuptools_scm] +version_scheme = "release-branch-semver" [tool.pylint.main] jobs = 0 diff --git a/setup.py b/setup.py index 528d34d0d7..0058f0e0ce 100755 --- a/setup.py +++ b/setup.py @@ -28,8 +28,7 @@ # Use with pip install . to install from source 'install': [ 'cartopy', - # see https://github.com/SciTools/cf-units/issues/218 - 'cf-units>=3.0.0,<3.1.0,!=3.0.1.post0', # ESMValCore/issues/1655 + 'cf-units', 'dask[array]', 'esgf-pyclient>=0.3.1', 'esmpy!=8.1.0', # see github.com/ESMValGroup/ESMValCore/issues/1208 @@ -42,7 +41,7 @@ 'jinja2', 'nc-time-axis', # needed by iris.plot 'nested-lookup', - 'netCDF4!=1.6.1', # github.com/ESMValGroup/ESMValCore/issues/1723 + 'netCDF4', 'numpy', 'pandas', 'pillow', @@ -59,7 +58,8 @@ ], # Test dependencies 'test': [ - 'flake8<5.0', # github.com/ESMValGroup/ESMValCore/issues/1696 + 'flake8', + 'mypy<0.990', 'pytest>=3.9,!=6.0.0rc1,!=6.0.0', 'pytest-cov>=2.10.1', 'pytest-env', diff --git a/tests/integration/cmor/_fixes/cmip5/test_access1_0.py b/tests/integration/cmor/_fixes/cmip5/test_access1_0.py index 5a89460a9d..1312762aab 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_access1_0.py +++ b/tests/integration/cmor/_fixes/cmip5/test_access1_0.py @@ -13,45 +13,54 @@ from esmvalcore.iris_helpers import date2num -class TestAllVars(unittest.TestCase): - """Test all vars fixes.""" +@pytest.fixture +def cube(): + """Cube for testing.""" + test_cube = Cube([1.0, 2.0], var_name='co2', units='J') + reference_dates = [ + datetime(300, 1, 16, 12), # e.g. piControl + datetime(1850, 1, 16, 12) # e.g. historical + ] + esgf_time_units = Unit( + 'days since 0001-01-01', + calendar='proleptic_gregorian', + ) + time_points = date2num(reference_dates, esgf_time_units) + test_cube.add_dim_coord( + DimCoord(time_points, 'time', 'time', 'time', esgf_time_units), + data_dim=0, + ) + return test_cube + - def setUp(self): - """Prepare tests.""" - self.cube = Cube([1.0, 2.0], var_name='co2', units='J') - reference_dates = [ - datetime(300, 1, 16, 12), # e.g. piControl - datetime(1850, 1, 16, 12) # e.g. historical - ] - esgf_time_units = Unit('days since 0001-01-01', - calendar='proleptic_gregorian') - time_points = date2num(reference_dates, esgf_time_units) - self.cube.add_dim_coord( - DimCoord(time_points, 'time', 'time', 'time', esgf_time_units), - data_dim=0) - self.fix = AllVars(None) +class TestAllVars: + """Test all vars fixes.""" - def test_get(self): + @staticmethod + def test_get(): """Test getting of fix.""" - self.assertListEqual( - Fix.get_fixes('CMIP5', 'ACCESS1-0', 'Amon', 'tas'), - [AllVars(None)]) + assert (Fix.get_fixes('CMIP5', 'ACCESS1-0', 'Amon', 'tas') + == [AllVars(None)]) - def test_fix_metadata(self): + @staticmethod + def test_fix_metadata(cube): """Test fix for bad calendar.""" - cube = self.fix.fix_metadata([self.cube])[0] + fix = AllVars(None) + cube = fix.fix_metadata([cube])[0] time = cube.coord('time') dates = num2date(time.points, time.units.name, time.units.calendar) - self.assertEqual(time.units.calendar, 'gregorian') - u = Unit('days since 300-01-01 12:00:00', calendar='gregorian') - self.assertEqual(dates[0], u.num2date(15)) - u = Unit('days since 1850-01-01 12:00:00', calendar='gregorian') - self.assertEqual(dates[1], u.num2date(15)) + assert time.units.calendar in ('standard', 'gregorian') + u = Unit('days since 300-01-01 12:00:00', calendar='standard') + assert dates[0] == u.num2date(15) + u = Unit('days since 1850-01-01 12:00:00', calendar='standard') + assert dates[1] == u.num2date(15) - def test_fix_metadata_if_not_time(self): + @staticmethod + def test_fix_metadata_if_not_time(cube): """Test calendar fix do not fail if no time coord present.""" - self.cube.remove_coord('time') - self.fix.fix_metadata([self.cube]) + cube.remove_coord('time') + fix = AllVars(None) + fix.fix_metadata([cube]) def test_get_cl_fix(): diff --git a/tests/integration/cmor/_fixes/cmip5/test_access1_3.py b/tests/integration/cmor/_fixes/cmip5/test_access1_3.py index d28286078f..68d59201de 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_access1_3.py +++ b/tests/integration/cmor/_fixes/cmip5/test_access1_3.py @@ -1,7 +1,7 @@ """Test fixes for ACCESS1-3.""" -import unittest from datetime import datetime +import pytest from cf_units import Unit, num2date from iris.coords import DimCoord from iris.cube import Cube @@ -12,45 +12,54 @@ from esmvalcore.iris_helpers import date2num -class TestAllVars(unittest.TestCase): - """Test fixes for all vars.""" +@pytest.fixture +def cube(): + """Cube for testing.""" + test_cube = Cube([1.0, 2.0], var_name='co2', units='J') + reference_dates = [ + datetime(300, 1, 16, 12), # e.g. piControl + datetime(1850, 1, 16, 12) # e.g. historical + ] + esgf_time_units = Unit( + 'days since 0001-01-01', + calendar='proleptic_gregorian', + ) + time_points = date2num(reference_dates, esgf_time_units) + test_cube.add_dim_coord( + DimCoord(time_points, 'time', 'time', 'time', esgf_time_units), + data_dim=0, + ) + return test_cube + - def setUp(self): - """Prepare tests.""" - self.cube = Cube([1.0, 2.0], var_name='co2', units='J') - reference_dates = [ - datetime(300, 1, 16, 12), # e.g. piControl - datetime(1850, 1, 16, 12) # e.g. historical - ] - esgf_time_units = Unit('days since 0001-01-01', - calendar='proleptic_gregorian') - time_points = date2num(reference_dates, esgf_time_units) - self.cube.add_dim_coord( - DimCoord(time_points, 'time', 'time', 'time', esgf_time_units), - data_dim=0) - self.fix = AllVars(None) +class TestAllVars: + """Test fixes for all vars.""" - def test_get(self): + @staticmethod + def test_get(): """Test getting of fix.""" - self.assertListEqual( - Fix.get_fixes('CMIP5', 'ACCESS1-3', 'Amon', 'tas'), - [AllVars(None)]) + assert (Fix.get_fixes('CMIP5', 'ACCESS1-3', 'Amon', 'tas') + == [AllVars(None)]) - def test_fix_metadata(self): + @staticmethod + def test_fix_metadata(cube): """Test fix for bad calendar.""" - cube = self.fix.fix_metadata([self.cube])[0] + fix = AllVars(None) + cube = fix.fix_metadata([cube])[0] time = cube.coord('time') dates = num2date(time.points, time.units.name, time.units.calendar) - self.assertEqual(time.units.calendar, 'gregorian') - u = Unit('days since 300-01-01 12:00:00', calendar='gregorian') - self.assertEqual(dates[0], u.num2date(15)) - u = Unit('days since 1850-01-01 12:00:00', calendar='gregorian') - self.assertEqual(dates[1], u.num2date(15)) + assert time.units.calendar in ('standard', 'gregorian') + u = Unit('days since 300-01-01 12:00:00', calendar='standard') + assert dates[0] == u.num2date(15) + u = Unit('days since 1850-01-01 12:00:00', calendar='standard') + assert dates[1] == u.num2date(15) - def test_fix_metadata_if_not_time(self): + @staticmethod + def test_fix_metadata_if_not_time(cube): """Test calendar fix do not fail if no time coord present.""" - self.cube.remove_coord('time') - self.fix.fix_metadata([self.cube]) + cube.remove_coord('time') + fix = AllVars(None) + fix.fix_metadata([cube]) def test_get_cl_fix(): diff --git a/tests/integration/cmor/_fixes/cmip5/test_fgoals_g2.py b/tests/integration/cmor/_fixes/cmip5/test_fgoals_g2.py index 2cfffcdb3f..abda3d180b 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_fgoals_g2.py +++ b/tests/integration/cmor/_fixes/cmip5/test_fgoals_g2.py @@ -1,6 +1,5 @@ """Test FGOALS-g2 fixes.""" -import unittest - +import pytest from cf_units import Unit from iris.coords import DimCoord from iris.cube import Cube @@ -9,47 +8,54 @@ from esmvalcore.cmor.fix import Fix -class TestAll(unittest.TestCase): +@pytest.fixture +def cube(): + """Cube for testing.""" + test_cube = Cube([[1.0, 2.0]], var_name='co2', units='J') + test_cube.add_dim_coord( + DimCoord( + [0.0, 1.0], + standard_name='time', + units=Unit('days since 0001-01', calendar='gregorian')), + 1) + test_cube.add_dim_coord( + DimCoord( + [180], + standard_name='longitude', + units=Unit('degrees')), + 0) + return test_cube + + +class TestAll: """Test fixes for all vars.""" - def setUp(self): - """Prepare tests.""" - self.cube = Cube([[1.0, 2.0]], var_name='co2', units='J') - self.cube.add_dim_coord( - DimCoord( - [0.0, 1.0], - standard_name='time', - units=Unit('days since 0001-01', calendar='gregorian')), - 1) - self.cube.add_dim_coord( - DimCoord( - [180], - standard_name='longitude', - units=Unit('degrees')), - 0) - self.fix = AllVars(None) - - def test_get(self): + @staticmethod + def test_get(): """Test fix get.""" - self.assertListEqual( - Fix.get_fixes('CMIP5', 'FGOALS-G2', 'Amon', 'tas'), - [AllVars(None)]) + assert (Fix.get_fixes('CMIP5', 'FGOALS-G2', 'Amon', 'tas') + == [AllVars(None)]) - def test_fix_metadata(self): + @staticmethod + def test_fix_metadata(cube): """Test calendar fix.""" - cube = self.fix.fix_metadata([self.cube])[0] + fix = AllVars(None) + cube = fix.fix_metadata([cube])[0] time = cube.coord('time') - self.assertEqual(time.units.origin, - 'day since 1-01-01 00:00:00.000000') - self.assertEqual(time.units.calendar, 'gregorian') + assert time.units.origin == 'day since 1-01-01 00:00:00.000000' + assert time.units.calendar in ('standard', 'gregorian') - def test_fix_metadata_dont_fail_if_not_longitude(self): + @staticmethod + def test_fix_metadata_dont_fail_if_not_longitude(cube): """Test calendar fix.""" - self.cube.remove_coord('longitude') - self.fix.fix_metadata([self.cube]) + cube.remove_coord('longitude') + fix = AllVars(None) + fix.fix_metadata([cube]) - def test_fix_metadata_dont_fail_if_not_time(self): + @staticmethod + def test_fix_metadata_dont_fail_if_not_time(cube): """Test calendar fix.""" - self.cube.remove_coord('time') - self.fix.fix_metadata([self.cube]) + cube.remove_coord('time') + fix = AllVars(None) + fix.fix_metadata([cube]) diff --git a/tests/integration/preprocessor/_regrid/test_regrid.py b/tests/integration/preprocessor/_regrid/test_regrid.py index a6ba8b95b5..ee0d056686 100644 --- a/tests/integration/preprocessor/_regrid/test_regrid.py +++ b/tests/integration/preprocessor/_regrid/test_regrid.py @@ -241,11 +241,6 @@ def test_regrid__unstructured_nearest_int(self): expected = np.array([[[1]], [[1]], [[1]]]) np.testing.assert_array_equal(result.data, expected) - # Make sure that dtype is not preserved (since conversion from float to - # int would be necessary) - assert np.issubdtype(self.unstructured_grid_cube.dtype, np.integer) - assert result.dtype == np.float64 - # Make sure that output is a masked array with correct fill value # (= maximum int) np.testing.assert_allclose(result.data.fill_value, diff --git a/tests/sample_data/multimodel_statistics/test_multimodel.py b/tests/sample_data/multimodel_statistics/test_multimodel.py index 485328d85b..00ced5a348 100644 --- a/tests/sample_data/multimodel_statistics/test_multimodel.py +++ b/tests/sample_data/multimodel_statistics/test_multimodel.py @@ -1,290 +1,923 @@ -"""Test using sample data for :func:`esmvalcore.preprocessor._multimodel`.""" +"""Unit test for :func:`esmvalcore.preprocessor._multimodel`.""" -import pickle -import platform -from itertools import groupby -from pathlib import Path +from datetime import datetime +from unittest import mock +import cftime +import dask.array as da import iris +import iris.coord_categorisation import numpy as np import pytest +from cf_units import Unit +from iris.coords import AuxCoord +from iris.cube import Cube -from esmvalcore.preprocessor import extract_time -from esmvalcore.preprocessor._multimodel import multi_model_statistics - -esmvaltool_sample_data = pytest.importorskip("esmvaltool_sample_data") - -# Increase this number anytime you change the cached input data to the tests. -TEST_REVISION = 1 - -CALENDAR_PARAMS = ( - pytest.param( - '360_day', - marks=pytest.mark.skip( - reason='Cannot calculate statistics with single cube in list')), - '365_day', - 'gregorian', - pytest.param( - 'proleptic_gregorian', - marks=pytest.mark.xfail( - raises=iris.exceptions.MergeError, - reason='https://github.com/ESMValGroup/ESMValCore/issues/956')), - pytest.param( - 'julian', - marks=pytest.mark.skip( - reason='Cannot calculate statistics with single cube in list')), -) +import esmvalcore.preprocessor._multimodel as mm +from esmvalcore.iris_helpers import date2num +from esmvalcore.preprocessor import multi_model_statistics +from esmvalcore.preprocessor._ancillary_vars import add_ancillary_variable + +SPAN_OPTIONS = ('overlap', 'full') + +FREQUENCY_OPTIONS = ('daily', 'monthly', 'yearly') # hourly -SPAN_PARAMS = ('overlap', 'full') +CALENDAR_OPTIONS = ('360_day', '365_day', 'standard', 'proleptic_gregorian', + 'julian') -def assert_array_almost_equal(this, other): - """Assert that array `this` almost equals array `other`.""" +def assert_array_allclose(this, other): + """Assert that array `this` is close to array `other`.""" if np.ma.isMaskedArray(this) or np.ma.isMaskedArray(other): np.testing.assert_array_equal(this.mask, other.mask) np.testing.assert_allclose(this, other) -def assert_coords_equal(this: list, other: list): - """Assert coords list `this` equals coords list `other`.""" - for this_coord, other_coord in zip(this, other): - np.testing.assert_equal(this_coord.points, other_coord.points) - assert this_coord.var_name == other_coord.var_name - assert this_coord.standard_name == other_coord.standard_name - assert this_coord.units == other_coord.units +def timecoord(frequency, + calendar='standard', + offset='days since 1850-01-01', + num=3): + """Return a time coordinate with the given time points and calendar.""" + + time_points = range(1, num + 1) + + if frequency == 'hourly': + dates = [datetime(1850, 1, 1, i, 0, 0) for i in time_points] + if frequency == 'daily': + dates = [datetime(1850, 1, i, 0, 0, 0) for i in time_points] + elif frequency == 'monthly': + dates = [datetime(1850, i, 15, 0, 0, 0) for i in time_points] + elif frequency == 'yearly': + dates = [datetime(1850 + i - 1, 7, 1, 0, 0, 0) for i in time_points] + + unit = Unit(offset, calendar=calendar) + points = date2num(dates, unit) + return iris.coords.DimCoord(points, standard_name='time', units=unit) + + +def generate_cube_from_dates( + dates, + calendar='standard', + offset='days since 1850-01-01', + fill_val=1, + len_data=3, + var_name=None, + lazy=False, +): + """Generate test cube from list of dates / frequency specification. + + Parameters + ---------- + calendar : str or list + Date frequency: 'hourly' / 'daily' / 'monthly' / 'yearly' or + list of datetimes. + offset : str + Offset to use + fill_val : int + Value to fill the data with + len_data : int + Number of data / time points + var_name : str + Name of the data variable + + Returns + ------- + iris.cube.Cube + """ + if isinstance(dates, str): + time = timecoord(dates, calendar, offset=offset, num=len_data) + else: + len_data = len(dates) + unit = Unit(offset, calendar=calendar) + time = iris.coords.DimCoord(date2num(dates, unit), + standard_name='time', + units=unit) + + data = np.array((fill_val, ) * len_data, dtype=np.float32) + + if lazy: + data = da.from_array(data) + + return Cube(data, dim_coords_and_dims=[(time, 0)], var_name=var_name) + + +def get_cubes_for_validation_test(frequency, lazy=False): + """Set up cubes used for testing multimodel statistics.""" + + # Simple 1d cube with standard time cord + cube1 = generate_cube_from_dates(frequency, lazy=lazy) + + # Cube with masked data + cube2 = cube1.copy() + data2 = np.ma.array([5, 5, 5], mask=[True, False, False], dtype=np.float32) + if lazy: + data2 = da.from_array(data2) + cube2.data = data2 + + # Cube with deviating time coord + cube3 = generate_cube_from_dates(frequency, + calendar='360_day', + offset='days since 1950-01-01', + len_data=2, + fill_val=9, + lazy=lazy) + + return [cube1, cube2, cube3] + + +def get_cube_for_equal_coords_test(num_cubes): + """Setup cubes with equal auxiliary coordinates.""" + cubes = [] + + for num in range(num_cubes): + cube = generate_cube_from_dates('monthly') + cubes.append(cube) + + # Create cubes that have one equal coordinate ('year') and one non-equal + # coordinate ('x') + year_coord = AuxCoord([1, 2, 3], var_name='year', long_name='year', + units='1', attributes={'test': 1}) + x_coord = AuxCoord([1, 2, 3], var_name='x', long_name='x', units='s', + attributes={'test': 2}) + for (idx, cube) in enumerate(cubes): + new_x_coord = x_coord.copy() + new_x_coord.long_name = f'x_{idx}' + cube.add_aux_coord(year_coord.copy(), 0) + cube.add_aux_coord(new_x_coord, 0) + assert cube.coord('year').metadata is not year_coord.metadata + assert cube.coord('year').metadata == year_coord.metadata + assert cube.coord(f'x_{idx}').metadata is not x_coord.metadata + assert cube.coord(f'x_{idx}').metadata != x_coord.metadata + + return cubes -def assert_metadata_equal(this, other): - """Assert metadata `this` are equal to metadata `other`.""" - assert this.standard_name == other.standard_name - assert this.long_name == other.long_name - assert this.var_name == other.var_name - assert this.units == other.units +VALIDATION_DATA_SUCCESS = ( + ('full', 'mean', (5, 5, 3)), + ('full', 'std_dev', (5.656854249492381, 4, 2.8284271247461903)), + ('full', 'std', (5.656854249492381, 4, 2.8284271247461903)), + ('full', 'min', (1, 1, 1)), + ('full', 'max', (9, 9, 5)), + ('full', 'median', (5, 5, 3)), + ('full', 'p50', (5, 5, 3)), + ('full', 'p99.5', (8.96, 8.96, 4.98)), + ('full', 'peak', (9, 9, 5)), + ('overlap', 'mean', (5, 5)), + ('overlap', 'std_dev', (5.656854249492381, 4)), + ('overlap', 'std', (5.656854249492381, 4)), + ('overlap', 'min', (1, 1)), + ('overlap', 'max', (9, 9)), + ('overlap', 'median', (5, 5)), + ('overlap', 'p50', (5, 5)), + ('overlap', 'p99.5', (8.96, 8.96)), + ('overlap', 'peak', (9, 9)), + # test multiple statistics + ('overlap', ('min', 'max'), ((1, 1), (9, 9))), + ('full', ('min', 'max'), ((1, 1, 1), (9, 9, 5))), +) -def fix_metadata(cubes): - """Fix metadata.""" +@pytest.mark.parametrize( + 'length,slices', + [ + (1, [slice(0, 1)]), + (25000, [slice(0, 8334), + slice(8334, 16668), + slice(16668, 25000)]), + ], +) +def test_compute_slices(length, slices): + """Test cube `_compute_slices`.""" + cubes = [ + Cube(da.empty([length, 50, 100], dtype=np.float32)) for _ in range(5) + ] + result = list(mm._compute_slices(cubes)) + assert result == slices + + +def test_compute_slices_exceed_end_index(): + """Test that ``_compute_slices`` terminates when exceeding end index.""" + # The following settings will result in a cube length of 71, 10 slices and + # a slice length of 8. Thus, without early termination, the last slice + # would be slice(72, 71), which would result in an exception. + cube_data = mock.Mock(nbytes=1.1 * 2**30) # roughly 1.1 GiB + cube = mock.Mock(spec=Cube, data=cube_data, shape=(71,)) + cubes = [cube] * 9 + + slices = list(mm._compute_slices(cubes)) + + # Early termination lead to 9 (instead of 10) slices + assert len(slices) == 9 + expected_slices = [ + slice(0, 8, None), + slice(8, 16, None), + slice(16, 24, None), + slice(24, 32, None), + slice(32, 40, None), + slice(40, 48, None), + slice(48, 56, None), + slice(56, 64, None), + slice(64, 71, None), + ] + assert slices == expected_slices + + +def test_compute_slices_equals_end_index(): + """Test that ``_compute_slices`` terminates when reaching end index.""" + # The following settings will result in a cube length of 36, 13 slices and + # a slice length of 3. Thus, without early termination, the last slice + # would be slice(36, 39), which would result in an exception. + cube_data = mock.Mock(nbytes=1.05 * 2**30) # roughly 1.05 GiB + cube = mock.Mock(spec=Cube, data=cube_data, shape=(36,)) + cubes = [cube] * 12 + + slices = list(mm._compute_slices(cubes)) + + # Early termination lead to 12 (instead of 13) slices + assert len(slices) == 12 + expected_slices = [ + slice(0, 3, None), + slice(3, 6, None), + slice(6, 9, None), + slice(9, 12, None), + slice(12, 15, None), + slice(15, 18, None), + slice(18, 21, None), + slice(21, 24, None), + slice(24, 27, None), + slice(27, 30, None), + slice(30, 33, None), + slice(33, 36, None), + ] + assert slices == expected_slices + + +@pytest.mark.parametrize('frequency', FREQUENCY_OPTIONS) +@pytest.mark.parametrize('span, statistics, expected', VALIDATION_DATA_SUCCESS) +def test_multimodel_statistics(frequency, span, statistics, expected): + """High level test for multicube statistics function.""" + cubes = get_cubes_for_validation_test(frequency) + + if isinstance(statistics, str): + statistics = (statistics, ) + expected = (expected, ) + + result = multi_model_statistics(cubes, span, statistics) + + assert isinstance(result, dict) + assert set(result.keys()) == set(statistics) + + for i, statistic in enumerate(statistics): + result_cube = result[statistic] + # make sure that temporary coord has been removed + with pytest.raises(iris.exceptions.CoordinateNotFoundError): + result_cube.coord('multi-model') + # test that real data in => real data out + assert result_cube.has_lazy_data() is False + expected_data = np.ma.array(expected[i], mask=False) + assert_array_allclose(result_cube.data, expected_data) + + +@pytest.mark.xfail(reason='Lazy data not (yet) supported.') +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_lazy_data_consistent_times(span): + """Test laziness of multimodel statistics with consistent time axis.""" + cubes = ( + generate_cube_from_dates('monthly', fill_val=1, lazy=True), + generate_cube_from_dates('monthly', fill_val=3, lazy=True), + generate_cube_from_dates('monthly', fill_val=6, lazy=True), + ) + for cube in cubes: - cube.coord('air_pressure').bounds = None + assert cube.has_lazy_data() + statistic = 'sum' + statistics = (statistic, ) -def preprocess_data(cubes, time_slice: dict = None): - """Regrid the data to the first cube and optional time-slicing.""" - # Increase TEST_REVISION anytime you make changes to this function. - if time_slice: - cubes = [extract_time(cube, **time_slice) for cube in cubes] + result = mm._multicube_statistics(cubes, span=span, statistics=statistics) - first_cube = cubes[0] + result_cube = result[statistic] + assert result_cube.has_lazy_data() - # regrid to first cube - regrid_kwargs = { - 'grid': first_cube, - 'scheme': iris.analysis.Nearest(), - } - cubes = [cube.regrid(**regrid_kwargs) for cube in cubes] +@pytest.mark.xfail(reason='Lazy data not (yet) supported.') +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_lazy_data_inconsistent_times(span): + """Test laziness of multimodel statistics with inconsistent time axis. - return cubes + This hits `_align`, which adds additional computations which must be + lazy. + """ + cubes = ( + generate_cube_from_dates( + [datetime(1850, i, 15, 0, 0, 0) for i in range(1, 10)], lazy=True), + generate_cube_from_dates( + [datetime(1850, i, 15, 0, 0, 0) for i in range(3, 8)], lazy=True), + generate_cube_from_dates( + [datetime(1850, i, 15, 0, 0, 0) for i in range(2, 9)], lazy=True), + ) + + for cube in cubes: + assert cube.has_lazy_data() + + statistic = 'sum' + statistics = (statistic, ) + + result = mm._multicube_statistics(cubes, span=span, statistics=statistics) -def get_cache_key(value): - """Get a cache key that is hopefully unique enough for unpickling. + result_cube = result[statistic] + assert result_cube.has_lazy_data() + + +VALIDATION_DATA_FAIL = ( + ('percentile', ValueError), + ('wpercentile', ValueError), + ('count', TypeError), + ('proportion', TypeError), +) - If this doesn't avoid problems with unpickling the cached data, - manually clean the pytest cache with the command `pytest --cache-clear`. + +@pytest.mark.parametrize('statistic, error', VALIDATION_DATA_FAIL) +def test_unsupported_statistics_fail(statistic, error): + """Check that unsupported statistics raise an exception.""" + cubes = get_cubes_for_validation_test('monthly') + span = 'overlap' + statistics = (statistic, ) + with pytest.raises(error): + _ = multi_model_statistics(cubes, span, statistics) + + +@pytest.mark.parametrize('calendar1, calendar2, expected', ( + ('360_day', '360_day', ('360_day',)), + ('365_day', '365_day', ('365_day',)), + ('365_day', '360_day', ('standard', 'gregorian')), + ('360_day', '365_day', ('standard', 'gregorian')), + ('standard', '365_day', ('standard', 'gregorian')), + ('proleptic_gregorian', 'julian', ('standard', 'gregorian')), + ('julian', '365_day', ('standard', 'gregorian')), +)) +def test_get_consistent_time_unit(calendar1, calendar2, expected): + """Test same calendar returned or default if calendars differ. + + Expected behaviour: If the calendars are the same, return that one. + If the calendars are not the same, return 'standard'. """ - py_version = platform.python_version() - return (f'{value}_iris-{iris.__version__}_' - f'numpy-{np.__version__}_python-{py_version}' - f'rev-{TEST_REVISION}') + cubes = ( + generate_cube_from_dates('monthly', calendar=calendar1), + generate_cube_from_dates('monthly', calendar=calendar2), + ) + result = mm._get_consistent_time_unit(cubes) + assert result.calendar in expected -@pytest.fixture(scope="module") -def timeseries_cubes_month(request): - """Load representative timeseries data.""" - # cache the cubes to save about 30-60 seconds on repeat use - cache_key = get_cache_key("sample_data/monthly") - data = request.config.cache.get(cache_key, None) - if data: - cubes = pickle.loads(data.encode('latin1')) - else: - # Increase TEST_REVISION anytime you make changes here. - time_slice = { - 'start_year': 1985, - 'end_year': 1987, - 'start_month': 12, - 'end_month': 2, - 'start_day': 1, - 'end_day': 1, - } - cubes = esmvaltool_sample_data.load_timeseries_cubes(mip_table='Amon') - cubes = preprocess_data(cubes, time_slice=time_slice) - - # cubes are not serializable via json, so we must go via pickle - request.config.cache.set(cache_key, - pickle.dumps(cubes).decode('latin1')) - - fix_metadata(cubes) +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_align(span): + """Test _align function.""" - return cubes + # TODO --> check that if a cube is extended, + # the extended points are masked (not NaN!) + len_data = 3 -@pytest.fixture(scope="module") -def timeseries_cubes_day(request): - """Load representative timeseries data grouped by calendar.""" - # cache the cubes to save about 30-60 seconds on repeat use - cache_key = get_cache_key("sample_data/daily") - data = request.config.cache.get(cache_key, None) + cubes = [] - if data: - cubes = pickle.loads(data.encode('latin1')) + for calendar in CALENDAR_OPTIONS: + cube = generate_cube_from_dates('monthly', + calendar=calendar, + len_data=3) + cubes.append(cube) - else: - # Increase TEST_REVISION anytime you make changes here. - time_slice = { - 'start_year': 2001, - 'end_year': 2002, - 'start_month': 12, - 'end_month': 2, - 'start_day': 1, - 'end_day': 1, - } - cubes = esmvaltool_sample_data.load_timeseries_cubes(mip_table='day') - cubes = preprocess_data(cubes, time_slice=time_slice) + result_cubes = mm._align(cubes, span) - # cubes are not serializable via json, so we must go via pickle - request.config.cache.set(cache_key, - pickle.dumps(cubes).decode('latin1')) + calendars = set(cube.coord('time').units.calendar for cube in result_cubes) - fix_metadata(cubes) + assert len(calendars) == 1 + assert list(calendars)[0] in ('standard', 'gregorian') - def calendar(cube): - return cube.coord('time').units.calendar + shapes = set(cube.shape for cube in result_cubes) - # groupby requires sorted list - grouped = groupby(sorted(cubes, key=calendar), key=calendar) + assert len(shapes) == 1 + assert tuple(shapes)[0] == (len_data, ) - cube_dict = {key: list(group) for key, group in grouped} - return cube_dict +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_combine_same_shape(span): + """Test _combine with same shape of cubes.""" + len_data = 3 + num_cubes = 5 + cubes = [] + for i in range(num_cubes): + cube = generate_cube_from_dates('monthly', + '360_day', + fill_val=i, + len_data=len_data) + cubes.append(cube) -def multimodel_test(cubes, statistic, span): - """Run multimodel test with some simple checks.""" - statistics = [statistic] + result_cube = mm._combine(cubes) + + dim_coord = result_cube.coord(mm.CONCAT_DIM) + assert dim_coord.var_name == mm.CONCAT_DIM + assert result_cube.shape == (num_cubes, len_data) + + desired = np.linspace((0, ) * len_data, + num_cubes - 1, + num=num_cubes, + dtype=int) + np.testing.assert_equal(result_cube.data, desired) - result = multi_model_statistics(products=cubes, - statistics=statistics, - span=span) - assert isinstance(result, dict) - assert statistic in result - return result +def test_combine_different_shape_fail(): + """Test _combine with inconsistent data.""" + num_cubes = 5 + cubes = [] + for num in range(1, num_cubes + 1): + cube = generate_cube_from_dates('monthly', '360_day', len_data=num) + cubes.append(cube) -def multimodel_regression_test(cubes, span, name): - """Run multimodel regression test. + with pytest.raises(iris.exceptions.MergeError): + _ = mm._combine(cubes) - This test will fail if the input data or multimodel code changed. To - update the data for the regression test, remove the corresponding - `.nc` files in this directory and re-run the tests. The tests will - fail the first time with a RuntimeError, because the reference data - are being written. + +def test_combine_inconsistent_var_names_fail(): + """Test _combine with inconsistent var names.""" + num_cubes = 5 + cubes = [] + + for num in range(num_cubes): + cube = generate_cube_from_dates('monthly', + '360_day', + var_name=f'test_var_{num}') + cubes.append(cube) + + with pytest.raises(iris.exceptions.MergeError): + _ = mm._combine(cubes) + + +@pytest.mark.parametrize('scalar_coord', ['p0', 'ptop']) +def test_combine_with_scalar_coords_to_remove(scalar_coord): + """Test _combine with scalar coordinates that should be removed.""" + num_cubes = 5 + cubes = [] + + for num in range(num_cubes): + cube = generate_cube_from_dates('monthly') + cubes.append(cube) + + scalar_coord_0 = AuxCoord(0.0, var_name=scalar_coord) + scalar_coord_1 = AuxCoord(1.0, var_name=scalar_coord) + cubes[0].add_aux_coord(scalar_coord_0, ()) + cubes[1].add_aux_coord(scalar_coord_1, ()) + + merged_cube = mm._combine(cubes) + assert merged_cube.shape == (5, 3) + + +def test_combine_preserve_equal_coordinates(): + """Test ``_combine`` with equal input coordinates.""" + cubes = get_cube_for_equal_coords_test(5) + merged_cube = mm._combine(cubes) + + # The equal coordinate ('year') was not changed; the non-equal one ('x') + # does not have a long_name and attributes anymore + assert merged_cube.coord('year').var_name == 'year' + assert merged_cube.coord('year').standard_name is None + assert merged_cube.coord('year').long_name == 'year' + assert merged_cube.coord('year').attributes == {'test': 1} + assert merged_cube.coord('x').var_name == 'x' + assert merged_cube.coord('x').standard_name is None + assert merged_cube.coord('x').long_name is None + assert merged_cube.coord('x').attributes == {} + + +def test_equalise_coordinates_no_cubes(): + """Test that _equalise_coordinates doesn't fail with empty cubes.""" + mm._equalise_coordinates([]) + + +def test_equalise_coordinates_one_cube(): + """Test that _equalise_coordinates doesn't fail with a single cubes.""" + cube = generate_cube_from_dates('monthly') + new_cube = cube.copy() + mm._equalise_coordinates([new_cube]) + assert new_cube is not cube + assert new_cube == cube + + +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_edge_case_different_time_offsets(span): + cubes = ( + generate_cube_from_dates('monthly', + '360_day', + offset='days since 1888-01-01'), + generate_cube_from_dates('monthly', + '360_day', + offset='days since 1899-01-01'), + ) + + statistic = 'min' + statistics = (statistic, ) + + result = multi_model_statistics(cubes, span, statistics) + + result_cube = result[statistic] + + time_coord = result_cube.coord('time') + + assert time_coord.units.calendar in ('standard', 'gregorian') + assert time_coord.units.origin == 'days since 1850-01-01' + + desired = np.array((14., 45., 73.)) + np.testing.assert_array_equal(time_coord.points, desired) + + +def generate_cubes_with_non_overlapping_timecoords(): + """Generate sample data where time coords do not overlap.""" + time_points = range(1, 4) + dates1 = [datetime(1850, i, 15, 0, 0, 0) for i in time_points] + dates2 = [datetime(1950, i, 15, 0, 0, 0) for i in time_points] + + return ( + generate_cube_from_dates(dates1), + generate_cube_from_dates(dates2), + ) + + +@pytest.mark.xfail(reason='Multimodel statistics returns the original cubes.') +def test_edge_case_time_no_overlap_fail(): + """Test case when time coords do not overlap using span='overlap'. + + Expected behaviour: `multi_model_statistics` should fail if time + points are not overlapping. """ - statistic = 'mean' - result = multimodel_test(cubes, statistic=statistic, span=span) + cubes = generate_cubes_with_non_overlapping_timecoords() + + statistic = 'min' + statistics = (statistic, ) + + with pytest.raises(ValueError): + _ = multi_model_statistics(cubes, 'overlap', statistics) + + +def test_edge_case_time_no_overlap_success(): + """Test case when time coords do not overlap using span='full'. + + Expected behaviour: `multi_model_statistics` should use all + available time points. + """ + cubes = generate_cubes_with_non_overlapping_timecoords() + + statistic = 'min' + statistics = (statistic, ) + + result = multi_model_statistics(cubes, 'full', statistics) result_cube = result[statistic] - filename = Path(__file__).with_name(f'{name}-{span}-{statistic}.nc') - if filename.exists(): - reference_cube = iris.load_cube(str(filename)) + assert result_cube.coord('time').shape == (6, ) - assert_array_almost_equal(result_cube.data, reference_cube.data) - assert_metadata_equal(result_cube.metadata, reference_cube.metadata) - assert_coords_equal(result_cube.coords(), reference_cube.coords()) - else: - # The test will fail if no regression data are available. - iris.save(result_cube, filename) - raise RuntimeError(f'Wrote reference data to {filename.absolute()}') - - -@pytest.mark.xfail( - raises=iris.exceptions.MergeError, - reason='https://github.com/ESMValGroup/ESMValCore/issues/956') -@pytest.mark.use_sample_data -@pytest.mark.parametrize('span', SPAN_PARAMS) -def test_multimodel_regression_month(timeseries_cubes_month, span): - """Test statistic.""" - cubes = timeseries_cubes_month - name = 'timeseries_monthly' - multimodel_regression_test( - name=name, - span=span, - cubes=cubes, +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_edge_case_time_not_in_middle_of_months(span): + """Test case when time coords are not on 15th for monthly data. + + Expected behaviour: `multi_model_statistics` will set all dates to + the 15th. + """ + time_points = range(1, 4) + dates1 = [datetime(1850, i, 12, 0, 0, 0) for i in time_points] + dates2 = [datetime(1850, i, 25, 0, 0, 0) for i in time_points] + + cubes = ( + generate_cube_from_dates(dates1), + generate_cube_from_dates(dates2), ) + statistic = 'min' + statistics = (statistic, ) + + result = multi_model_statistics(cubes, span, statistics) + result_cube = result[statistic] + + time_coord = result_cube.coord('time') + + desired = np.array((14., 45., 73.)) + np.testing.assert_array_equal(time_coord.points, desired) + + +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_edge_case_sub_daily_data_fail(span): + """Test case when cubes with sub-daily time coords are passed.""" + cube = generate_cube_from_dates('hourly') + cubes = (cube, cube) + + statistic = 'min' + statistics = (statistic, ) + + with pytest.raises(ValueError): + _ = multi_model_statistics(cubes, span, statistics) + + +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_edge_case_single_cube_fail(span): + """Test that an error is raised when a single cube is passed.""" + cube = generate_cube_from_dates('monthly') + cubes = (cube, ) + + statistic = 'min' + statistics = (statistic, ) + + with pytest.raises(ValueError): + _ = multi_model_statistics(cubes, span, statistics) + + +def test_unify_time_coordinates(): + """Test set common calendar.""" + cube1 = generate_cube_from_dates('monthly', + calendar='360_day', + offset='days since 1850-01-01') + cube2 = generate_cube_from_dates('monthly', + calendar='standard', + offset='days since 1943-05-16') + + mm._unify_time_coordinates([cube1, cube2]) + + assert cube1.coord('time') == cube2.coord('time') + + +class PreprocessorFile: + """Mockup to test output of multimodel.""" + + def __init__(self, cube=None, attributes=None): + if cube: + self.cubes = [cube] + if attributes: + self.attributes = attributes + + def wasderivedfrom(self, product): + pass + + def group(self, keys: list) -> str: + """Generate group keyword. + + Returns a string that identifies a group. Concatenates a list of + values from .attributes + """ + if not keys: + return '' + + if isinstance(keys, str): + keys = [keys] + + identifier = [] + for key in keys: + attribute = self.attributes.get(key) + if attribute: + if isinstance(attribute, (list, tuple)): + attribute = '-'.join(attribute) + identifier.append(attribute) + + return '_'.join(identifier) -@pytest.mark.use_sample_data -@pytest.mark.parametrize('calendar', CALENDAR_PARAMS) -@pytest.mark.parametrize('span', SPAN_PARAMS) -def test_multimodel_regression_day(timeseries_cubes_day, span, calendar): - """Test statistic.""" - cubes = timeseries_cubes_day[calendar] - name = f'timeseries_daily_{calendar}' - multimodel_regression_test( - name=name, - span=span, - cubes=cubes, + +def test_return_products(): + """Check that the right product set is returned.""" + cube1 = generate_cube_from_dates('monthly', fill_val=1) + cube2 = generate_cube_from_dates('monthly', fill_val=9) + + input1 = PreprocessorFile(cube1) + input2 = PreprocessorFile(cube2) + + products = set([input1, input2]) + + output = PreprocessorFile() + output_products = {'': {'mean': output}} + + kwargs = { + 'statistics': ['mean'], + 'span': 'full', + 'output_products': output_products[''] + } + + result1 = mm._multiproduct_statistics(products, + keep_input_datasets=True, + **kwargs) + + result2 = mm._multiproduct_statistics(products, + keep_input_datasets=False, + **kwargs) + + assert result1 == set([input1, input2, output]) + assert result2 == set([output]) + + kwargs['output_products'] = output_products + result3 = mm.multi_model_statistics(products, **kwargs) + result4 = mm.multi_model_statistics(products, + keep_input_datasets=False, + **kwargs) + + assert result3 == result1 + assert result4 == result2 + + +def test_ensemble_products(): + cube1 = generate_cube_from_dates('monthly', fill_val=1) + cube2 = generate_cube_from_dates('monthly', fill_val=9) + + attributes1 = { + 'project': 'project', 'dataset': 'dataset', + 'exp': 'exp', 'ensemble': '1'} + input1 = PreprocessorFile(cube1, attributes=attributes1) + + attributes2 = { + 'project': 'project', 'dataset': 'dataset', + 'exp': 'exp', 'ensemble': '2'} + input2 = PreprocessorFile(cube2, attributes=attributes2) + + attributes3 = { + 'project': 'project', 'dataset': 'dataset2', + 'exp': 'exp', 'ensemble': '1'} + input3 = PreprocessorFile(cube1, attributes=attributes3) + + attributes4 = { + 'project': 'project', 'dataset': 'dataset2', + 'exp': 'exp', 'ensemble': '2'} + + input4 = PreprocessorFile(cube1, attributes=attributes4) + products = set([input1, input2, input3, input4]) + + output1 = PreprocessorFile() + output2 = PreprocessorFile() + output_products = { + 'project_dataset_exp': {'mean': output1}, + 'project_dataset2_exp': {'mean': output2}} + + kwargs = { + 'statistics': ['mean'], + 'output_products': output_products, + } + + result = mm.ensemble_statistics( + products, **kwargs) + assert len(result) == 2 + + +def test_ignore_tas_scalar_height_coord(): + """Ignore conflicting aux_coords for height in tas.""" + tas_2m = generate_cube_from_dates("monthly") + tas_1p5m = generate_cube_from_dates("monthly") + + for cube, height in zip([tas_2m, tas_1p5m], [2., 1.5]): + cube.rename("air_temperature") + cube.attributes["short_name"] = "tas" + cube.add_aux_coord( + iris.coords.AuxCoord([height], var_name="height", units="m")) + + result = mm.multi_model_statistics( + [tas_2m, tas_2m.copy(), tas_1p5m], statistics=['mean'], span='full') + + # iris automatically averages the value of the scalar coordinate. + assert len(result['mean'].coords("height")) == 1 + assert result["mean"].coord("height").points == 1.75 + + +def test_daily_inconsistent_calendars(): + """Determine behaviour for inconsistent calendars. + + Deviating calendars should be converted to standard. Missing data + inside original bounds is filled with nearest neighbour Missing data + outside original bounds is masked. + """ + ref_standard = Unit("days since 1850-01-01", calendar="standard") + ref_noleap = Unit("days since 1850-01-01", calendar="noleap") + start = date2num(datetime(1852, 1, 1), ref_standard) + + # 1852 is a leap year, and include 1 extra day at the end + leapdates = cftime.num2date(start + np.arange(367), + ref_standard.name, ref_standard.calendar) + + noleapdates = cftime.num2date(start + np.arange(365), + ref_noleap.name, ref_noleap.calendar) + + leapcube = generate_cube_from_dates( + leapdates, + calendar='standard', + offset='days since 1850-01-01', + fill_val=1, ) + noleapcube = generate_cube_from_dates( + noleapdates, + calendar='noleap', + offset='days since 1850-01-01', + fill_val=3, + ) -@pytest.mark.use_sample_data -def test_multimodel_no_vertical_dimension(timeseries_cubes_month): - """Test statistic without vertical dimension using monthly data.""" - span = 'full' - cubes = timeseries_cubes_month - cubes = [cube[:, 0] for cube in cubes] - multimodel_test(cubes, span=span, statistic='mean') - - -@pytest.mark.use_sample_data -@pytest.mark.xfail( - raises=iris.exceptions.MergeError, - reason='https://github.com/ESMValGroup/ESMValCore/issues/956') -# @pytest.mark.xfail( -# raises=iris.exceptions.CoordinateNotFoundError, -# reason='https://github.com/ESMValGroup/ESMValCore/issues/891') -def test_multimodel_no_horizontal_dimension(timeseries_cubes_month): - """Test statistic without horizontal dimension using monthly data.""" - span = 'full' - cubes = timeseries_cubes_month - cubes = [cube[:, :, 0, 0] for cube in cubes] - # Coordinate not found error - # iris.exceptions.CoordinateNotFoundError: - # 'Expected to find exactly 1 depth coordinate, but found none.' - multimodel_test(cubes, span=span, statistic='mean') - - -@pytest.mark.use_sample_data -def test_multimodel_only_time_dimension(timeseries_cubes_month): - """Test statistic without only the time dimension using monthly data.""" - cubes = timeseries_cubes_month - span = 'full' - cubes = [cube[:, 0, 0, 0] for cube in cubes] - multimodel_test(cubes, span=span, statistic='mean') - - -@pytest.mark.use_sample_data -@pytest.mark.xfail( - raises=ValueError, - reason='https://github.com/ESMValGroup/ESMValCore/issues/890') -def test_multimodel_no_time_dimension(timeseries_cubes_month): - """Test statistic without time dimension using monthly data.""" - span = 'full' - cubes = timeseries_cubes_month - cubes = [cube[0] for cube in cubes] - # ValueError: Cannot guess bounds for a coordinate of length 1. - multimodel_test(cubes, span=span, statistic='mean') + cubes = [leapcube, noleapcube] + + # span=full + aligned_cubes = mm._align(cubes, span='full') + for cube in aligned_cubes: + assert cube.coord('time').units.calendar in ("standard", "gregorian") + assert cube.shape == (367, ) + assert cube[59].coord('time').points == 789 # 29 Feb 1852 + np.ma.is_masked(aligned_cubes[1][366].data) # outside original range + + result = multi_model_statistics(cubes, span="full", statistics=['mean']) + result_cube = result['mean'] + assert result_cube[59].data == 2 # looked up nearest neighbour + assert result_cube[366].data == 1 # outside original range + + # span=overlap + aligned_cubes = mm._align(cubes, span='overlap') + for cube in aligned_cubes: + assert cube.coord('time').units.calendar in ("standard", "gregorian") + assert cube.shape == (365, ) + assert cube[59].coord('time').points == 790 # 1 March 1852 + + result = multi_model_statistics(cubes, span="overlap", statistics=['mean']) + result_cube = result['mean'] + assert result_cube[59].data == 2 + + +def test_remove_fx_variables(): + """Test fx variables are removed from cubes.""" + cube1 = generate_cube_from_dates("monthly") + fx_cube = generate_cube_from_dates("monthly") + fx_cube.standard_name = "land_area_fraction" + add_ancillary_variable(cube1, fx_cube) + + cube2 = generate_cube_from_dates("monthly", fill_val=9) + result = mm.multi_model_statistics([cube1, cube2], + statistics=['mean'], + span='full') + assert result['mean'].ancillary_variables() == [] + + +def test_no_warn_model_dim_non_contiguous(recwarn): + """Test that now warning is raised that model dim is non-contiguous.""" + coord = iris.coords.DimCoord( + [0.5, 1.5], + bounds=[[0, 1.], [1., 2.]], + standard_name='time', + units='days since 1850-01-01', + ) + cube1 = iris.cube.Cube([1, 1], dim_coords_and_dims=[(coord, 0)]) + cube2 = iris.cube.Cube([2, 2], dim_coords_and_dims=[(coord, 0)]) + cubes = [cube1, cube2] + + multi_model_statistics(cubes, span="overlap", statistics=['mean']) + msg = ("Collapsing a non-contiguous coordinate. " + "Metadata may not be fully descriptive for 'multi-model'.") + for warning in recwarn: + assert str(warning.message) != msg + + +def test_map_to_new_time_int_coords(): + """Test ``_map_to_new_time`` with integer time coords.""" + cube = generate_cube_from_dates('yearly') + iris.coord_categorisation.add_year(cube, 'time') + decade_coord = AuxCoord([1850, 1850, 1850], bounds=[[1845, 1855]] * 3, + long_name='decade') + cube.add_aux_coord(decade_coord, 0) + target_points = [200.0, 500.0, 1000.0] + + out_cube = mm._map_to_new_time(cube, target_points) + + assert_array_allclose(out_cube.data, + np.ma.masked_invalid([1.0, 1.0, np.nan])) + assert_array_allclose(out_cube.coord('time').points, target_points) + assert_array_allclose(out_cube.coord('year').points, + np.ma.masked_invalid([1850, 1851, np.nan])) + assert_array_allclose(out_cube.coord('decade').points, + np.ma.masked_invalid([1850, 1850, np.nan])) + assert out_cube.coord('year').bounds is None + assert out_cube.coord('decade').bounds is None + assert np.issubdtype(out_cube.coord('year').dtype, np.integer) + assert np.issubdtype(out_cube.coord('decade').dtype, np.integer) + + +def test_preserve_equal_coordinates(): + """Test ``multi_model_statistics`` with equal input coordinates.""" + cubes = get_cube_for_equal_coords_test(5) + stat_cubes = multi_model_statistics(cubes, span='overlap', + statistics=['sum']) + + assert len(stat_cubes) == 1 + assert 'sum' in stat_cubes + stat_cube = stat_cubes['sum'] + assert_array_allclose(stat_cube.data, np.ma.array([5.0, 5.0, 5.0])) + + # The equal coordinate ('year') was not changed; the non-equal one ('x') + # does not have a long_name and attributes anymore + assert stat_cube.coord('year').var_name == 'year' + assert stat_cube.coord('year').standard_name is None + assert stat_cube.coord('year').long_name == 'year' + assert stat_cube.coord('year').attributes == {'test': 1} + assert stat_cube.coord('x').var_name == 'x' + assert stat_cube.coord('x').standard_name is None + assert stat_cube.coord('x').long_name is None + assert stat_cube.coord('x').attributes == {} diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 7d1f6bc2bc..00ced5a348 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -22,7 +22,7 @@ FREQUENCY_OPTIONS = ('daily', 'monthly', 'yearly') # hourly -CALENDAR_OPTIONS = ('360_day', '365_day', 'gregorian', 'proleptic_gregorian', +CALENDAR_OPTIONS = ('360_day', '365_day', 'standard', 'proleptic_gregorian', 'julian') @@ -35,7 +35,7 @@ def assert_array_allclose(this, other): def timecoord(frequency, - calendar='gregorian', + calendar='standard', offset='days since 1850-01-01', num=3): """Return a time coordinate with the given time points and calendar.""" @@ -58,7 +58,7 @@ def timecoord(frequency, def generate_cube_from_dates( dates, - calendar='gregorian', + calendar='standard', offset='days since 1850-01-01', fill_val=1, len_data=3, @@ -350,19 +350,19 @@ def test_unsupported_statistics_fail(statistic, error): @pytest.mark.parametrize('calendar1, calendar2, expected', ( - ('360_day', '360_day', '360_day'), - ('365_day', '365_day', '365_day'), - ('365_day', '360_day', 'gregorian'), - ('360_day', '365_day', 'gregorian'), - ('gregorian', '365_day', 'gregorian'), - ('proleptic_gregorian', 'julian', 'gregorian'), - ('julian', '365_day', 'gregorian'), + ('360_day', '360_day', ('360_day',)), + ('365_day', '365_day', ('365_day',)), + ('365_day', '360_day', ('standard', 'gregorian')), + ('360_day', '365_day', ('standard', 'gregorian')), + ('standard', '365_day', ('standard', 'gregorian')), + ('proleptic_gregorian', 'julian', ('standard', 'gregorian')), + ('julian', '365_day', ('standard', 'gregorian')), )) def test_get_consistent_time_unit(calendar1, calendar2, expected): """Test same calendar returned or default if calendars differ. Expected behaviour: If the calendars are the same, return that one. - If the calendars are not the same, return 'gregorian'. + If the calendars are not the same, return 'standard'. """ cubes = ( generate_cube_from_dates('monthly', calendar=calendar1), @@ -370,7 +370,7 @@ def test_get_consistent_time_unit(calendar1, calendar2, expected): ) result = mm._get_consistent_time_unit(cubes) - assert result.calendar == expected + assert result.calendar in expected @pytest.mark.parametrize('span', SPAN_OPTIONS) @@ -395,7 +395,7 @@ def test_align(span): calendars = set(cube.coord('time').units.calendar for cube in result_cubes) assert len(calendars) == 1 - assert list(calendars)[0] == 'gregorian' + assert list(calendars)[0] in ('standard', 'gregorian') shapes = set(cube.shape for cube in result_cubes) @@ -528,7 +528,7 @@ def test_edge_case_different_time_offsets(span): time_coord = result_cube.coord('time') - assert time_coord.units.calendar == 'gregorian' + assert time_coord.units.calendar in ('standard', 'gregorian') assert time_coord.units.origin == 'days since 1850-01-01' desired = np.array((14., 45., 73.)) @@ -640,7 +640,7 @@ def test_unify_time_coordinates(): calendar='360_day', offset='days since 1850-01-01') cube2 = generate_cube_from_dates('monthly', - calendar='gregorian', + calendar='standard', offset='days since 1943-05-16') mm._unify_time_coordinates([cube1, cube2]) @@ -787,7 +787,7 @@ def test_ignore_tas_scalar_height_coord(): def test_daily_inconsistent_calendars(): """Determine behaviour for inconsistent calendars. - Deviating calendars should be converted to gregorian. Missing data + Deviating calendars should be converted to standard. Missing data inside original bounds is filled with nearest neighbour Missing data outside original bounds is masked. """ @@ -804,7 +804,7 @@ def test_daily_inconsistent_calendars(): leapcube = generate_cube_from_dates( leapdates, - calendar='gregorian', + calendar='standard', offset='days since 1850-01-01', fill_val=1, ) @@ -821,7 +821,7 @@ def test_daily_inconsistent_calendars(): # span=full aligned_cubes = mm._align(cubes, span='full') for cube in aligned_cubes: - assert cube.coord('time').units.calendar == "gregorian" + assert cube.coord('time').units.calendar in ("standard", "gregorian") assert cube.shape == (367, ) assert cube[59].coord('time').points == 789 # 29 Feb 1852 np.ma.is_masked(aligned_cubes[1][366].data) # outside original range @@ -834,7 +834,7 @@ def test_daily_inconsistent_calendars(): # span=overlap aligned_cubes = mm._align(cubes, span='overlap') for cube in aligned_cubes: - assert cube.coord('time').units.calendar == "gregorian" + assert cube.coord('time').units.calendar in ("standard", "gregorian") assert cube.shape == (365, ) assert cube[59].coord('time').points == 790 # 1 March 1852