diff --git a/doc/changelog.rst b/doc/changelog.rst
index c4a2fb90de..0b1170fe2f 100644
--- a/doc/changelog.rst
+++ b/doc/changelog.rst
@@ -2,6 +2,28 @@ Changelog
 =========
 
 
+.. _changelog-v2-7-1:
+
+
+v2.7.1
+------
+Highlights
+~~~~~~~~~~
+
+This is a bugfix release where we unpin `cf-units` to allow for latest `iris=3.4.0` to be installed.
+
+Installation
+~~~~~~~~~~~~
+
+- Unpin cf-units (`#1770 <https://github.com/ESMValGroup/ESMValCore/pull/1770>`__) `Bouwe Andela <https://github.com/bouweandela>`__
+
+Bug fixes
+~~~~~~~~~
+
+- Improve error handling if an esgf index node is offline (`#1834 <https://github.com/ESMValGroup/ESMValCore/pull/1834>`__) `Bouwe Andela <https://github.com/bouweandela>`__
+
+
+
 .. _changelog-v2-7-0:
 
 
diff --git a/environment.yml b/environment.yml
index c219e6a5a4..53253bcb78 100644
--- a/environment.yml
+++ b/environment.yml
@@ -2,20 +2,19 @@
 name: esmvaltool
 channels:
   - conda-forge
+  - nodefaults
 
 dependencies:
   - cartopy
-  - cf-units>=3.0.0,<3.1.0  # github.com/ESMValGroup/ESMValCore/issues/1655
+  - cf-units
   - cftime
   - compilers
-  # 1.8.18/py39, they seem weary to build manylinux wheels
-  # and pypi ver built with older gdal
   - fiona
   - esmpy!=8.1.0  # see github.com/ESMValGroup/ESMValCore/issues/1208
   - geopy
   - iris>=3.2.1
   - nested-lookup
-  - netcdf4!=1.6.1  # github.com/ESMValGroup/ESMValCore/issues/1723
+  - netcdf4
   - pandas
   - pillow
   - pip!=21.3
diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py
index 734d401726..5712ecc22b 100644
--- a/esmvalcore/cmor/check.py
+++ b/esmvalcore/cmor/check.py
@@ -945,7 +945,7 @@ def _simplify_calendar(calendar):
         calendar_aliases = {
             'all_leap': '366_day',
             'noleap': '365_day',
-            'standard': 'gregorian',
+            'gregorian': 'standard',
         }
         return calendar_aliases.get(calendar, calendar)
 
diff --git a/esmvalcore/esgf/_search.py b/esmvalcore/esgf/_search.py
index 9dc831f992..28198317c9 100644
--- a/esmvalcore/esgf/_search.py
+++ b/esmvalcore/esgf/_search.py
@@ -110,7 +110,12 @@ def _search_index_nodes(facets):
             )
             FIRST_ONLINE_INDEX_NODE = url
             return results
-        except requests.exceptions.Timeout as error:
+        except (
+            requests.exceptions.ConnectionError,
+            requests.exceptions.HTTPError,
+            requests.exceptions.Timeout,
+        ) as error:
+            logger.debug("Unable to connect to %s due to %s", url, error)
             errors.append(error)
 
     raise FileNotFoundError("Failed to search ESGF, unable to connect:\n" +
diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py
index 9c9fa9662e..1b994ecbad 100644
--- a/esmvalcore/preprocessor/_multimodel.py
+++ b/esmvalcore/preprocessor/_multimodel.py
@@ -96,7 +96,7 @@ def _unify_time_coordinates(cubes):
     are no mismatches in the time arrays.
 
     If cubes have different time units, it will reset the calendar to a
-    default gregorian calendar with unit "days since 1850-01-01".
+    the "standard" calendar with unit "days since 1850-01-01".
 
     Might not work for (sub)daily data, because different calendars may have
     different number of days in the year.
diff --git a/pyproject.toml b/pyproject.toml
index 7688e015c5..a1dce171b6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,6 +3,7 @@ requires = ["setuptools >= 40.6.0", "wheel", "setuptools_scm>=6.2"]
 build-backend = "setuptools.build_meta"
 
 [tool.setuptools_scm]
+version_scheme = "release-branch-semver"
 
 [tool.pylint.main]
 jobs = 0
diff --git a/setup.py b/setup.py
index 528d34d0d7..0058f0e0ce 100755
--- a/setup.py
+++ b/setup.py
@@ -28,8 +28,7 @@
     # Use with pip install . to install from source
     'install': [
         'cartopy',
-        # see https://github.com/SciTools/cf-units/issues/218
-        'cf-units>=3.0.0,<3.1.0,!=3.0.1.post0',  # ESMValCore/issues/1655
+        'cf-units',
         'dask[array]',
         'esgf-pyclient>=0.3.1',
         'esmpy!=8.1.0',  # see github.com/ESMValGroup/ESMValCore/issues/1208
@@ -42,7 +41,7 @@
         'jinja2',
         'nc-time-axis',  # needed by iris.plot
         'nested-lookup',
-        'netCDF4!=1.6.1',  # github.com/ESMValGroup/ESMValCore/issues/1723
+        'netCDF4',
         'numpy',
         'pandas',
         'pillow',
@@ -59,7 +58,8 @@
     ],
     # Test dependencies
     'test': [
-        'flake8<5.0',  # github.com/ESMValGroup/ESMValCore/issues/1696
+        'flake8',
+        'mypy<0.990',
         'pytest>=3.9,!=6.0.0rc1,!=6.0.0',
         'pytest-cov>=2.10.1',
         'pytest-env',
diff --git a/tests/integration/cmor/_fixes/cmip5/test_access1_0.py b/tests/integration/cmor/_fixes/cmip5/test_access1_0.py
index 5a89460a9d..1312762aab 100644
--- a/tests/integration/cmor/_fixes/cmip5/test_access1_0.py
+++ b/tests/integration/cmor/_fixes/cmip5/test_access1_0.py
@@ -13,45 +13,54 @@
 from esmvalcore.iris_helpers import date2num
 
 
-class TestAllVars(unittest.TestCase):
-    """Test all vars fixes."""
+@pytest.fixture
+def cube():
+    """Cube for testing."""
+    test_cube = Cube([1.0, 2.0], var_name='co2', units='J')
+    reference_dates = [
+        datetime(300, 1, 16, 12),  # e.g. piControl
+        datetime(1850, 1, 16, 12)  # e.g. historical
+    ]
+    esgf_time_units = Unit(
+        'days since 0001-01-01',
+        calendar='proleptic_gregorian',
+    )
+    time_points = date2num(reference_dates, esgf_time_units)
+    test_cube.add_dim_coord(
+        DimCoord(time_points, 'time', 'time', 'time', esgf_time_units),
+        data_dim=0,
+    )
+    return test_cube
+
 
-    def setUp(self):
-        """Prepare tests."""
-        self.cube = Cube([1.0, 2.0], var_name='co2', units='J')
-        reference_dates = [
-            datetime(300, 1, 16, 12),  # e.g. piControl
-            datetime(1850, 1, 16, 12)  # e.g. historical
-        ]
-        esgf_time_units = Unit('days since 0001-01-01',
-                               calendar='proleptic_gregorian')
-        time_points = date2num(reference_dates, esgf_time_units)
-        self.cube.add_dim_coord(
-            DimCoord(time_points, 'time', 'time', 'time', esgf_time_units),
-            data_dim=0)
-        self.fix = AllVars(None)
+class TestAllVars:
+    """Test all vars fixes."""
 
-    def test_get(self):
+    @staticmethod
+    def test_get():
         """Test getting of fix."""
-        self.assertListEqual(
-            Fix.get_fixes('CMIP5', 'ACCESS1-0', 'Amon', 'tas'),
-            [AllVars(None)])
+        assert (Fix.get_fixes('CMIP5', 'ACCESS1-0', 'Amon', 'tas')
+                == [AllVars(None)])
 
-    def test_fix_metadata(self):
+    @staticmethod
+    def test_fix_metadata(cube):
         """Test fix for bad calendar."""
-        cube = self.fix.fix_metadata([self.cube])[0]
+        fix = AllVars(None)
+        cube = fix.fix_metadata([cube])[0]
         time = cube.coord('time')
         dates = num2date(time.points, time.units.name, time.units.calendar)
-        self.assertEqual(time.units.calendar, 'gregorian')
-        u = Unit('days since 300-01-01 12:00:00', calendar='gregorian')
-        self.assertEqual(dates[0], u.num2date(15))
-        u = Unit('days since 1850-01-01 12:00:00', calendar='gregorian')
-        self.assertEqual(dates[1], u.num2date(15))
+        assert time.units.calendar in ('standard', 'gregorian')
+        u = Unit('days since 300-01-01 12:00:00', calendar='standard')
+        assert dates[0] == u.num2date(15)
+        u = Unit('days since 1850-01-01 12:00:00', calendar='standard')
+        assert dates[1] == u.num2date(15)
 
-    def test_fix_metadata_if_not_time(self):
+    @staticmethod
+    def test_fix_metadata_if_not_time(cube):
         """Test calendar fix do not fail if no time coord present."""
-        self.cube.remove_coord('time')
-        self.fix.fix_metadata([self.cube])
+        cube.remove_coord('time')
+        fix = AllVars(None)
+        fix.fix_metadata([cube])
 
 
 def test_get_cl_fix():
diff --git a/tests/integration/cmor/_fixes/cmip5/test_access1_3.py b/tests/integration/cmor/_fixes/cmip5/test_access1_3.py
index d28286078f..68d59201de 100644
--- a/tests/integration/cmor/_fixes/cmip5/test_access1_3.py
+++ b/tests/integration/cmor/_fixes/cmip5/test_access1_3.py
@@ -1,7 +1,7 @@
 """Test fixes for ACCESS1-3."""
-import unittest
 from datetime import datetime
 
+import pytest
 from cf_units import Unit, num2date
 from iris.coords import DimCoord
 from iris.cube import Cube
@@ -12,45 +12,54 @@
 from esmvalcore.iris_helpers import date2num
 
 
-class TestAllVars(unittest.TestCase):
-    """Test fixes for all vars."""
+@pytest.fixture
+def cube():
+    """Cube for testing."""
+    test_cube = Cube([1.0, 2.0], var_name='co2', units='J')
+    reference_dates = [
+        datetime(300, 1, 16, 12),  # e.g. piControl
+        datetime(1850, 1, 16, 12)  # e.g. historical
+    ]
+    esgf_time_units = Unit(
+        'days since 0001-01-01',
+        calendar='proleptic_gregorian',
+    )
+    time_points = date2num(reference_dates, esgf_time_units)
+    test_cube.add_dim_coord(
+        DimCoord(time_points, 'time', 'time', 'time', esgf_time_units),
+        data_dim=0,
+    )
+    return test_cube
+
 
-    def setUp(self):
-        """Prepare tests."""
-        self.cube = Cube([1.0, 2.0], var_name='co2', units='J')
-        reference_dates = [
-            datetime(300, 1, 16, 12),  # e.g. piControl
-            datetime(1850, 1, 16, 12)  # e.g. historical
-        ]
-        esgf_time_units = Unit('days since 0001-01-01',
-                               calendar='proleptic_gregorian')
-        time_points = date2num(reference_dates, esgf_time_units)
-        self.cube.add_dim_coord(
-            DimCoord(time_points, 'time', 'time', 'time', esgf_time_units),
-            data_dim=0)
-        self.fix = AllVars(None)
+class TestAllVars:
+    """Test fixes for all vars."""
 
-    def test_get(self):
+    @staticmethod
+    def test_get():
         """Test getting of fix."""
-        self.assertListEqual(
-            Fix.get_fixes('CMIP5', 'ACCESS1-3', 'Amon', 'tas'),
-            [AllVars(None)])
+        assert (Fix.get_fixes('CMIP5', 'ACCESS1-3', 'Amon', 'tas')
+                == [AllVars(None)])
 
-    def test_fix_metadata(self):
+    @staticmethod
+    def test_fix_metadata(cube):
         """Test fix for bad calendar."""
-        cube = self.fix.fix_metadata([self.cube])[0]
+        fix = AllVars(None)
+        cube = fix.fix_metadata([cube])[0]
         time = cube.coord('time')
         dates = num2date(time.points, time.units.name, time.units.calendar)
-        self.assertEqual(time.units.calendar, 'gregorian')
-        u = Unit('days since 300-01-01 12:00:00', calendar='gregorian')
-        self.assertEqual(dates[0], u.num2date(15))
-        u = Unit('days since 1850-01-01 12:00:00', calendar='gregorian')
-        self.assertEqual(dates[1], u.num2date(15))
+        assert time.units.calendar in ('standard', 'gregorian')
+        u = Unit('days since 300-01-01 12:00:00', calendar='standard')
+        assert dates[0] == u.num2date(15)
+        u = Unit('days since 1850-01-01 12:00:00', calendar='standard')
+        assert dates[1] == u.num2date(15)
 
-    def test_fix_metadata_if_not_time(self):
+    @staticmethod
+    def test_fix_metadata_if_not_time(cube):
         """Test calendar fix do not fail if no time coord present."""
-        self.cube.remove_coord('time')
-        self.fix.fix_metadata([self.cube])
+        cube.remove_coord('time')
+        fix = AllVars(None)
+        fix.fix_metadata([cube])
 
 
 def test_get_cl_fix():
diff --git a/tests/integration/cmor/_fixes/cmip5/test_fgoals_g2.py b/tests/integration/cmor/_fixes/cmip5/test_fgoals_g2.py
index 2cfffcdb3f..abda3d180b 100644
--- a/tests/integration/cmor/_fixes/cmip5/test_fgoals_g2.py
+++ b/tests/integration/cmor/_fixes/cmip5/test_fgoals_g2.py
@@ -1,6 +1,5 @@
 """Test FGOALS-g2 fixes."""
-import unittest
-
+import pytest
 from cf_units import Unit
 from iris.coords import DimCoord
 from iris.cube import Cube
@@ -9,47 +8,54 @@
 from esmvalcore.cmor.fix import Fix
 
 
-class TestAll(unittest.TestCase):
+@pytest.fixture
+def cube():
+    """Cube for testing."""
+    test_cube = Cube([[1.0, 2.0]], var_name='co2', units='J')
+    test_cube.add_dim_coord(
+        DimCoord(
+            [0.0, 1.0],
+            standard_name='time',
+            units=Unit('days since 0001-01', calendar='gregorian')),
+        1)
+    test_cube.add_dim_coord(
+        DimCoord(
+            [180],
+            standard_name='longitude',
+            units=Unit('degrees')),
+        0)
+    return test_cube
+
+
+class TestAll:
     """Test fixes for all vars."""
 
-    def setUp(self):
-        """Prepare tests."""
-        self.cube = Cube([[1.0, 2.0]], var_name='co2', units='J')
-        self.cube.add_dim_coord(
-            DimCoord(
-                [0.0, 1.0],
-                standard_name='time',
-                units=Unit('days since 0001-01', calendar='gregorian')),
-            1)
-        self.cube.add_dim_coord(
-            DimCoord(
-                [180],
-                standard_name='longitude',
-                units=Unit('degrees')),
-            0)
-        self.fix = AllVars(None)
-
-    def test_get(self):
+    @staticmethod
+    def test_get():
         """Test fix get."""
-        self.assertListEqual(
-            Fix.get_fixes('CMIP5', 'FGOALS-G2', 'Amon', 'tas'),
-            [AllVars(None)])
+        assert (Fix.get_fixes('CMIP5', 'FGOALS-G2', 'Amon', 'tas')
+                == [AllVars(None)])
 
-    def test_fix_metadata(self):
+    @staticmethod
+    def test_fix_metadata(cube):
         """Test calendar fix."""
-        cube = self.fix.fix_metadata([self.cube])[0]
+        fix = AllVars(None)
+        cube = fix.fix_metadata([cube])[0]
 
         time = cube.coord('time')
-        self.assertEqual(time.units.origin,
-                         'day since 1-01-01 00:00:00.000000')
-        self.assertEqual(time.units.calendar, 'gregorian')
+        assert time.units.origin == 'day since 1-01-01 00:00:00.000000'
+        assert time.units.calendar in ('standard', 'gregorian')
 
-    def test_fix_metadata_dont_fail_if_not_longitude(self):
+    @staticmethod
+    def test_fix_metadata_dont_fail_if_not_longitude(cube):
         """Test calendar fix."""
-        self.cube.remove_coord('longitude')
-        self.fix.fix_metadata([self.cube])
+        cube.remove_coord('longitude')
+        fix = AllVars(None)
+        fix.fix_metadata([cube])
 
-    def test_fix_metadata_dont_fail_if_not_time(self):
+    @staticmethod
+    def test_fix_metadata_dont_fail_if_not_time(cube):
         """Test calendar fix."""
-        self.cube.remove_coord('time')
-        self.fix.fix_metadata([self.cube])
+        cube.remove_coord('time')
+        fix = AllVars(None)
+        fix.fix_metadata([cube])
diff --git a/tests/integration/preprocessor/_regrid/test_regrid.py b/tests/integration/preprocessor/_regrid/test_regrid.py
index a6ba8b95b5..ee0d056686 100644
--- a/tests/integration/preprocessor/_regrid/test_regrid.py
+++ b/tests/integration/preprocessor/_regrid/test_regrid.py
@@ -241,11 +241,6 @@ def test_regrid__unstructured_nearest_int(self):
         expected = np.array([[[1]], [[1]], [[1]]])
         np.testing.assert_array_equal(result.data, expected)
 
-        # Make sure that dtype is not preserved (since conversion from float to
-        # int would be necessary)
-        assert np.issubdtype(self.unstructured_grid_cube.dtype, np.integer)
-        assert result.dtype == np.float64
-
         # Make sure that output is a masked array with correct fill value
         # (= maximum int)
         np.testing.assert_allclose(result.data.fill_value,
diff --git a/tests/sample_data/multimodel_statistics/test_multimodel.py b/tests/sample_data/multimodel_statistics/test_multimodel.py
index 485328d85b..00ced5a348 100644
--- a/tests/sample_data/multimodel_statistics/test_multimodel.py
+++ b/tests/sample_data/multimodel_statistics/test_multimodel.py
@@ -1,290 +1,923 @@
-"""Test using sample data for :func:`esmvalcore.preprocessor._multimodel`."""
+"""Unit test for :func:`esmvalcore.preprocessor._multimodel`."""
 
-import pickle
-import platform
-from itertools import groupby
-from pathlib import Path
+from datetime import datetime
+from unittest import mock
 
+import cftime
+import dask.array as da
 import iris
+import iris.coord_categorisation
 import numpy as np
 import pytest
+from cf_units import Unit
+from iris.coords import AuxCoord
+from iris.cube import Cube
 
-from esmvalcore.preprocessor import extract_time
-from esmvalcore.preprocessor._multimodel import multi_model_statistics
-
-esmvaltool_sample_data = pytest.importorskip("esmvaltool_sample_data")
-
-# Increase this number anytime you change the cached input data to the tests.
-TEST_REVISION = 1
-
-CALENDAR_PARAMS = (
-    pytest.param(
-        '360_day',
-        marks=pytest.mark.skip(
-            reason='Cannot calculate statistics with single cube in list')),
-    '365_day',
-    'gregorian',
-    pytest.param(
-        'proleptic_gregorian',
-        marks=pytest.mark.xfail(
-            raises=iris.exceptions.MergeError,
-            reason='https://github.com/ESMValGroup/ESMValCore/issues/956')),
-    pytest.param(
-        'julian',
-        marks=pytest.mark.skip(
-            reason='Cannot calculate statistics with single cube in list')),
-)
+import esmvalcore.preprocessor._multimodel as mm
+from esmvalcore.iris_helpers import date2num
+from esmvalcore.preprocessor import multi_model_statistics
+from esmvalcore.preprocessor._ancillary_vars import add_ancillary_variable
+
+SPAN_OPTIONS = ('overlap', 'full')
+
+FREQUENCY_OPTIONS = ('daily', 'monthly', 'yearly')  # hourly
 
-SPAN_PARAMS = ('overlap', 'full')
+CALENDAR_OPTIONS = ('360_day', '365_day', 'standard', 'proleptic_gregorian',
+                    'julian')
 
 
-def assert_array_almost_equal(this, other):
-    """Assert that array `this` almost equals array `other`."""
+def assert_array_allclose(this, other):
+    """Assert that array `this` is close to array `other`."""
     if np.ma.isMaskedArray(this) or np.ma.isMaskedArray(other):
         np.testing.assert_array_equal(this.mask, other.mask)
 
     np.testing.assert_allclose(this, other)
 
 
-def assert_coords_equal(this: list, other: list):
-    """Assert coords list `this` equals coords list `other`."""
-    for this_coord, other_coord in zip(this, other):
-        np.testing.assert_equal(this_coord.points, other_coord.points)
-        assert this_coord.var_name == other_coord.var_name
-        assert this_coord.standard_name == other_coord.standard_name
-        assert this_coord.units == other_coord.units
+def timecoord(frequency,
+              calendar='standard',
+              offset='days since 1850-01-01',
+              num=3):
+    """Return a time coordinate with the given time points and calendar."""
+
+    time_points = range(1, num + 1)
+
+    if frequency == 'hourly':
+        dates = [datetime(1850, 1, 1, i, 0, 0) for i in time_points]
+    if frequency == 'daily':
+        dates = [datetime(1850, 1, i, 0, 0, 0) for i in time_points]
+    elif frequency == 'monthly':
+        dates = [datetime(1850, i, 15, 0, 0, 0) for i in time_points]
+    elif frequency == 'yearly':
+        dates = [datetime(1850 + i - 1, 7, 1, 0, 0, 0) for i in time_points]
+
+    unit = Unit(offset, calendar=calendar)
+    points = date2num(dates, unit)
+    return iris.coords.DimCoord(points, standard_name='time', units=unit)
+
+
+def generate_cube_from_dates(
+    dates,
+    calendar='standard',
+    offset='days since 1850-01-01',
+    fill_val=1,
+    len_data=3,
+    var_name=None,
+    lazy=False,
+):
+    """Generate test cube from list of dates / frequency specification.
+
+    Parameters
+    ----------
+    calendar : str or list
+        Date frequency: 'hourly' / 'daily' / 'monthly' / 'yearly' or
+        list of datetimes.
+    offset : str
+        Offset to use
+    fill_val : int
+        Value to fill the data with
+    len_data : int
+        Number of data / time points
+    var_name : str
+        Name of the data variable
+
+    Returns
+    -------
+    iris.cube.Cube
+    """
+    if isinstance(dates, str):
+        time = timecoord(dates, calendar, offset=offset, num=len_data)
+    else:
+        len_data = len(dates)
+        unit = Unit(offset, calendar=calendar)
+        time = iris.coords.DimCoord(date2num(dates, unit),
+                                    standard_name='time',
+                                    units=unit)
+
+    data = np.array((fill_val, ) * len_data, dtype=np.float32)
+
+    if lazy:
+        data = da.from_array(data)
+
+    return Cube(data, dim_coords_and_dims=[(time, 0)], var_name=var_name)
+
+
+def get_cubes_for_validation_test(frequency, lazy=False):
+    """Set up cubes used for testing multimodel statistics."""
+
+    # Simple 1d cube with standard time cord
+    cube1 = generate_cube_from_dates(frequency, lazy=lazy)
+
+    # Cube with masked data
+    cube2 = cube1.copy()
+    data2 = np.ma.array([5, 5, 5], mask=[True, False, False], dtype=np.float32)
+    if lazy:
+        data2 = da.from_array(data2)
+    cube2.data = data2
+
+    # Cube with deviating time coord
+    cube3 = generate_cube_from_dates(frequency,
+                                     calendar='360_day',
+                                     offset='days since 1950-01-01',
+                                     len_data=2,
+                                     fill_val=9,
+                                     lazy=lazy)
+
+    return [cube1, cube2, cube3]
+
+
+def get_cube_for_equal_coords_test(num_cubes):
+    """Setup cubes with equal auxiliary coordinates."""
+    cubes = []
+
+    for num in range(num_cubes):
+        cube = generate_cube_from_dates('monthly')
+        cubes.append(cube)
+
+    # Create cubes that have one equal coordinate ('year') and one non-equal
+    # coordinate ('x')
+    year_coord = AuxCoord([1, 2, 3], var_name='year', long_name='year',
+                          units='1', attributes={'test': 1})
+    x_coord = AuxCoord([1, 2, 3], var_name='x', long_name='x', units='s',
+                       attributes={'test': 2})
+    for (idx, cube) in enumerate(cubes):
+        new_x_coord = x_coord.copy()
+        new_x_coord.long_name = f'x_{idx}'
+        cube.add_aux_coord(year_coord.copy(), 0)
+        cube.add_aux_coord(new_x_coord, 0)
+        assert cube.coord('year').metadata is not year_coord.metadata
+        assert cube.coord('year').metadata == year_coord.metadata
+        assert cube.coord(f'x_{idx}').metadata is not x_coord.metadata
+        assert cube.coord(f'x_{idx}').metadata != x_coord.metadata
+
+    return cubes
 
 
-def assert_metadata_equal(this, other):
-    """Assert metadata `this` are equal to metadata `other`."""
-    assert this.standard_name == other.standard_name
-    assert this.long_name == other.long_name
-    assert this.var_name == other.var_name
-    assert this.units == other.units
+VALIDATION_DATA_SUCCESS = (
+    ('full', 'mean', (5, 5, 3)),
+    ('full', 'std_dev', (5.656854249492381, 4, 2.8284271247461903)),
+    ('full', 'std', (5.656854249492381, 4, 2.8284271247461903)),
+    ('full', 'min', (1, 1, 1)),
+    ('full', 'max', (9, 9, 5)),
+    ('full', 'median', (5, 5, 3)),
+    ('full', 'p50', (5, 5, 3)),
+    ('full', 'p99.5', (8.96, 8.96, 4.98)),
+    ('full', 'peak', (9, 9, 5)),
+    ('overlap', 'mean', (5, 5)),
+    ('overlap', 'std_dev', (5.656854249492381, 4)),
+    ('overlap', 'std', (5.656854249492381, 4)),
+    ('overlap', 'min', (1, 1)),
+    ('overlap', 'max', (9, 9)),
+    ('overlap', 'median', (5, 5)),
+    ('overlap', 'p50', (5, 5)),
+    ('overlap', 'p99.5', (8.96, 8.96)),
+    ('overlap', 'peak', (9, 9)),
+    # test multiple statistics
+    ('overlap', ('min', 'max'), ((1, 1), (9, 9))),
+    ('full', ('min', 'max'), ((1, 1, 1), (9, 9, 5))),
+)
 
 
-def fix_metadata(cubes):
-    """Fix metadata."""
+@pytest.mark.parametrize(
+    'length,slices',
+    [
+        (1, [slice(0, 1)]),
+        (25000, [slice(0, 8334),
+                 slice(8334, 16668),
+                 slice(16668, 25000)]),
+    ],
+)
+def test_compute_slices(length, slices):
+    """Test cube `_compute_slices`."""
+    cubes = [
+        Cube(da.empty([length, 50, 100], dtype=np.float32)) for _ in range(5)
+    ]
+    result = list(mm._compute_slices(cubes))
+    assert result == slices
+
+
+def test_compute_slices_exceed_end_index():
+    """Test that ``_compute_slices`` terminates when exceeding end index."""
+    # The following settings will result in a cube length of 71, 10 slices and
+    # a slice length of 8. Thus, without early termination, the last slice
+    # would be slice(72, 71), which would result in an exception.
+    cube_data = mock.Mock(nbytes=1.1 * 2**30)  # roughly 1.1 GiB
+    cube = mock.Mock(spec=Cube, data=cube_data, shape=(71,))
+    cubes = [cube] * 9
+
+    slices = list(mm._compute_slices(cubes))
+
+    # Early termination lead to 9 (instead of 10) slices
+    assert len(slices) == 9
+    expected_slices = [
+        slice(0, 8, None),
+        slice(8, 16, None),
+        slice(16, 24, None),
+        slice(24, 32, None),
+        slice(32, 40, None),
+        slice(40, 48, None),
+        slice(48, 56, None),
+        slice(56, 64, None),
+        slice(64, 71, None),
+    ]
+    assert slices == expected_slices
+
+
+def test_compute_slices_equals_end_index():
+    """Test that ``_compute_slices`` terminates when reaching end index."""
+    # The following settings will result in a cube length of 36, 13 slices and
+    # a slice length of 3. Thus, without early termination, the last slice
+    # would be slice(36, 39), which would result in an exception.
+    cube_data = mock.Mock(nbytes=1.05 * 2**30)  # roughly 1.05 GiB
+    cube = mock.Mock(spec=Cube, data=cube_data, shape=(36,))
+    cubes = [cube] * 12
+
+    slices = list(mm._compute_slices(cubes))
+
+    # Early termination lead to 12 (instead of 13) slices
+    assert len(slices) == 12
+    expected_slices = [
+        slice(0, 3, None),
+        slice(3, 6, None),
+        slice(6, 9, None),
+        slice(9, 12, None),
+        slice(12, 15, None),
+        slice(15, 18, None),
+        slice(18, 21, None),
+        slice(21, 24, None),
+        slice(24, 27, None),
+        slice(27, 30, None),
+        slice(30, 33, None),
+        slice(33, 36, None),
+    ]
+    assert slices == expected_slices
+
+
+@pytest.mark.parametrize('frequency', FREQUENCY_OPTIONS)
+@pytest.mark.parametrize('span, statistics, expected', VALIDATION_DATA_SUCCESS)
+def test_multimodel_statistics(frequency, span, statistics, expected):
+    """High level test for multicube statistics function."""
+    cubes = get_cubes_for_validation_test(frequency)
+
+    if isinstance(statistics, str):
+        statistics = (statistics, )
+        expected = (expected, )
+
+    result = multi_model_statistics(cubes, span, statistics)
+
+    assert isinstance(result, dict)
+    assert set(result.keys()) == set(statistics)
+
+    for i, statistic in enumerate(statistics):
+        result_cube = result[statistic]
+        # make sure that temporary coord has been removed
+        with pytest.raises(iris.exceptions.CoordinateNotFoundError):
+            result_cube.coord('multi-model')
+        # test that real data in => real data out
+        assert result_cube.has_lazy_data() is False
+        expected_data = np.ma.array(expected[i], mask=False)
+        assert_array_allclose(result_cube.data, expected_data)
+
+
+@pytest.mark.xfail(reason='Lazy data not (yet) supported.')
+@pytest.mark.parametrize('span', SPAN_OPTIONS)
+def test_lazy_data_consistent_times(span):
+    """Test laziness of multimodel statistics with consistent time axis."""
+    cubes = (
+        generate_cube_from_dates('monthly', fill_val=1, lazy=True),
+        generate_cube_from_dates('monthly', fill_val=3, lazy=True),
+        generate_cube_from_dates('monthly', fill_val=6, lazy=True),
+    )
+
     for cube in cubes:
-        cube.coord('air_pressure').bounds = None
+        assert cube.has_lazy_data()
 
+    statistic = 'sum'
+    statistics = (statistic, )
 
-def preprocess_data(cubes, time_slice: dict = None):
-    """Regrid the data to the first cube and optional time-slicing."""
-    # Increase TEST_REVISION anytime you make changes to this function.
-    if time_slice:
-        cubes = [extract_time(cube, **time_slice) for cube in cubes]
+    result = mm._multicube_statistics(cubes, span=span, statistics=statistics)
 
-    first_cube = cubes[0]
+    result_cube = result[statistic]
+    assert result_cube.has_lazy_data()
 
-    # regrid to first cube
-    regrid_kwargs = {
-        'grid': first_cube,
-        'scheme': iris.analysis.Nearest(),
-    }
 
-    cubes = [cube.regrid(**regrid_kwargs) for cube in cubes]
+@pytest.mark.xfail(reason='Lazy data not (yet) supported.')
+@pytest.mark.parametrize('span', SPAN_OPTIONS)
+def test_lazy_data_inconsistent_times(span):
+    """Test laziness of multimodel statistics with inconsistent time axis.
 
-    return cubes
+    This hits `_align`, which adds additional computations which must be
+    lazy.
+    """
 
+    cubes = (
+        generate_cube_from_dates(
+            [datetime(1850, i, 15, 0, 0, 0) for i in range(1, 10)], lazy=True),
+        generate_cube_from_dates(
+            [datetime(1850, i, 15, 0, 0, 0) for i in range(3, 8)], lazy=True),
+        generate_cube_from_dates(
+            [datetime(1850, i, 15, 0, 0, 0) for i in range(2, 9)], lazy=True),
+    )
+
+    for cube in cubes:
+        assert cube.has_lazy_data()
+
+    statistic = 'sum'
+    statistics = (statistic, )
+
+    result = mm._multicube_statistics(cubes, span=span, statistics=statistics)
 
-def get_cache_key(value):
-    """Get a cache key that is hopefully unique enough for unpickling.
+    result_cube = result[statistic]
+    assert result_cube.has_lazy_data()
+
+
+VALIDATION_DATA_FAIL = (
+    ('percentile', ValueError),
+    ('wpercentile', ValueError),
+    ('count', TypeError),
+    ('proportion', TypeError),
+)
 
-    If this doesn't avoid problems with unpickling the cached data,
-    manually clean the pytest cache with the command `pytest --cache-clear`.
+
+@pytest.mark.parametrize('statistic, error', VALIDATION_DATA_FAIL)
+def test_unsupported_statistics_fail(statistic, error):
+    """Check that unsupported statistics raise an exception."""
+    cubes = get_cubes_for_validation_test('monthly')
+    span = 'overlap'
+    statistics = (statistic, )
+    with pytest.raises(error):
+        _ = multi_model_statistics(cubes, span, statistics)
+
+
+@pytest.mark.parametrize('calendar1, calendar2, expected', (
+    ('360_day', '360_day', ('360_day',)),
+    ('365_day', '365_day', ('365_day',)),
+    ('365_day', '360_day', ('standard', 'gregorian')),
+    ('360_day', '365_day', ('standard', 'gregorian')),
+    ('standard', '365_day', ('standard', 'gregorian')),
+    ('proleptic_gregorian', 'julian', ('standard', 'gregorian')),
+    ('julian', '365_day', ('standard', 'gregorian')),
+))
+def test_get_consistent_time_unit(calendar1, calendar2, expected):
+    """Test same calendar returned or default if calendars differ.
+
+    Expected behaviour: If the calendars are the same, return that one.
+    If the calendars are not the same, return 'standard'.
     """
-    py_version = platform.python_version()
-    return (f'{value}_iris-{iris.__version__}_'
-            f'numpy-{np.__version__}_python-{py_version}'
-            f'rev-{TEST_REVISION}')
+    cubes = (
+        generate_cube_from_dates('monthly', calendar=calendar1),
+        generate_cube_from_dates('monthly', calendar=calendar2),
+    )
 
+    result = mm._get_consistent_time_unit(cubes)
+    assert result.calendar in expected
 
-@pytest.fixture(scope="module")
-def timeseries_cubes_month(request):
-    """Load representative timeseries data."""
-    # cache the cubes to save about 30-60 seconds on repeat use
-    cache_key = get_cache_key("sample_data/monthly")
-    data = request.config.cache.get(cache_key, None)
 
-    if data:
-        cubes = pickle.loads(data.encode('latin1'))
-    else:
-        # Increase TEST_REVISION anytime you make changes here.
-        time_slice = {
-            'start_year': 1985,
-            'end_year': 1987,
-            'start_month': 12,
-            'end_month': 2,
-            'start_day': 1,
-            'end_day': 1,
-        }
-        cubes = esmvaltool_sample_data.load_timeseries_cubes(mip_table='Amon')
-        cubes = preprocess_data(cubes, time_slice=time_slice)
-
-        # cubes are not serializable via json, so we must go via pickle
-        request.config.cache.set(cache_key,
-                                 pickle.dumps(cubes).decode('latin1'))
-
-    fix_metadata(cubes)
+@pytest.mark.parametrize('span', SPAN_OPTIONS)
+def test_align(span):
+    """Test _align function."""
 
-    return cubes
+    # TODO --> check that if a cube is extended,
+    #          the extended points are masked (not NaN!)
 
+    len_data = 3
 
-@pytest.fixture(scope="module")
-def timeseries_cubes_day(request):
-    """Load representative timeseries data grouped by calendar."""
-    # cache the cubes to save about 30-60 seconds on repeat use
-    cache_key = get_cache_key("sample_data/daily")
-    data = request.config.cache.get(cache_key, None)
+    cubes = []
 
-    if data:
-        cubes = pickle.loads(data.encode('latin1'))
+    for calendar in CALENDAR_OPTIONS:
+        cube = generate_cube_from_dates('monthly',
+                                        calendar=calendar,
+                                        len_data=3)
+        cubes.append(cube)
 
-    else:
-        # Increase TEST_REVISION anytime you make changes here.
-        time_slice = {
-            'start_year': 2001,
-            'end_year': 2002,
-            'start_month': 12,
-            'end_month': 2,
-            'start_day': 1,
-            'end_day': 1,
-        }
-        cubes = esmvaltool_sample_data.load_timeseries_cubes(mip_table='day')
-        cubes = preprocess_data(cubes, time_slice=time_slice)
+    result_cubes = mm._align(cubes, span)
 
-        # cubes are not serializable via json, so we must go via pickle
-        request.config.cache.set(cache_key,
-                                 pickle.dumps(cubes).decode('latin1'))
+    calendars = set(cube.coord('time').units.calendar for cube in result_cubes)
 
-    fix_metadata(cubes)
+    assert len(calendars) == 1
+    assert list(calendars)[0] in ('standard', 'gregorian')
 
-    def calendar(cube):
-        return cube.coord('time').units.calendar
+    shapes = set(cube.shape for cube in result_cubes)
 
-    # groupby requires sorted list
-    grouped = groupby(sorted(cubes, key=calendar), key=calendar)
+    assert len(shapes) == 1
+    assert tuple(shapes)[0] == (len_data, )
 
-    cube_dict = {key: list(group) for key, group in grouped}
 
-    return cube_dict
+@pytest.mark.parametrize('span', SPAN_OPTIONS)
+def test_combine_same_shape(span):
+    """Test _combine with same shape of cubes."""
+    len_data = 3
+    num_cubes = 5
+    cubes = []
 
+    for i in range(num_cubes):
+        cube = generate_cube_from_dates('monthly',
+                                        '360_day',
+                                        fill_val=i,
+                                        len_data=len_data)
+        cubes.append(cube)
 
-def multimodel_test(cubes, statistic, span):
-    """Run multimodel test with some simple checks."""
-    statistics = [statistic]
+    result_cube = mm._combine(cubes)
+
+    dim_coord = result_cube.coord(mm.CONCAT_DIM)
+    assert dim_coord.var_name == mm.CONCAT_DIM
+    assert result_cube.shape == (num_cubes, len_data)
+
+    desired = np.linspace((0, ) * len_data,
+                          num_cubes - 1,
+                          num=num_cubes,
+                          dtype=int)
+    np.testing.assert_equal(result_cube.data, desired)
 
-    result = multi_model_statistics(products=cubes,
-                                    statistics=statistics,
-                                    span=span)
-    assert isinstance(result, dict)
-    assert statistic in result
 
-    return result
+def test_combine_different_shape_fail():
+    """Test _combine with inconsistent data."""
+    num_cubes = 5
+    cubes = []
 
+    for num in range(1, num_cubes + 1):
+        cube = generate_cube_from_dates('monthly', '360_day', len_data=num)
+        cubes.append(cube)
 
-def multimodel_regression_test(cubes, span, name):
-    """Run multimodel regression test.
+    with pytest.raises(iris.exceptions.MergeError):
+        _ = mm._combine(cubes)
 
-    This test will fail if the input data or multimodel code changed. To
-    update the data for the regression test, remove the corresponding
-    `.nc` files in this directory and re-run the tests. The tests will
-    fail the first time with a RuntimeError, because the reference data
-    are being written.
+
+def test_combine_inconsistent_var_names_fail():
+    """Test _combine with inconsistent var names."""
+    num_cubes = 5
+    cubes = []
+
+    for num in range(num_cubes):
+        cube = generate_cube_from_dates('monthly',
+                                        '360_day',
+                                        var_name=f'test_var_{num}')
+        cubes.append(cube)
+
+    with pytest.raises(iris.exceptions.MergeError):
+        _ = mm._combine(cubes)
+
+
+@pytest.mark.parametrize('scalar_coord', ['p0', 'ptop'])
+def test_combine_with_scalar_coords_to_remove(scalar_coord):
+    """Test _combine with scalar coordinates that should be removed."""
+    num_cubes = 5
+    cubes = []
+
+    for num in range(num_cubes):
+        cube = generate_cube_from_dates('monthly')
+        cubes.append(cube)
+
+    scalar_coord_0 = AuxCoord(0.0, var_name=scalar_coord)
+    scalar_coord_1 = AuxCoord(1.0, var_name=scalar_coord)
+    cubes[0].add_aux_coord(scalar_coord_0, ())
+    cubes[1].add_aux_coord(scalar_coord_1, ())
+
+    merged_cube = mm._combine(cubes)
+    assert merged_cube.shape == (5, 3)
+
+
+def test_combine_preserve_equal_coordinates():
+    """Test ``_combine`` with equal input coordinates."""
+    cubes = get_cube_for_equal_coords_test(5)
+    merged_cube = mm._combine(cubes)
+
+    # The equal coordinate ('year') was not changed; the non-equal one ('x')
+    # does not have a long_name and attributes anymore
+    assert merged_cube.coord('year').var_name == 'year'
+    assert merged_cube.coord('year').standard_name is None
+    assert merged_cube.coord('year').long_name == 'year'
+    assert merged_cube.coord('year').attributes == {'test': 1}
+    assert merged_cube.coord('x').var_name == 'x'
+    assert merged_cube.coord('x').standard_name is None
+    assert merged_cube.coord('x').long_name is None
+    assert merged_cube.coord('x').attributes == {}
+
+
+def test_equalise_coordinates_no_cubes():
+    """Test that _equalise_coordinates doesn't fail with empty cubes."""
+    mm._equalise_coordinates([])
+
+
+def test_equalise_coordinates_one_cube():
+    """Test that _equalise_coordinates doesn't fail with a single cubes."""
+    cube = generate_cube_from_dates('monthly')
+    new_cube = cube.copy()
+    mm._equalise_coordinates([new_cube])
+    assert new_cube is not cube
+    assert new_cube == cube
+
+
+@pytest.mark.parametrize('span', SPAN_OPTIONS)
+def test_edge_case_different_time_offsets(span):
+    cubes = (
+        generate_cube_from_dates('monthly',
+                                 '360_day',
+                                 offset='days since 1888-01-01'),
+        generate_cube_from_dates('monthly',
+                                 '360_day',
+                                 offset='days since 1899-01-01'),
+    )
+
+    statistic = 'min'
+    statistics = (statistic, )
+
+    result = multi_model_statistics(cubes, span, statistics)
+
+    result_cube = result[statistic]
+
+    time_coord = result_cube.coord('time')
+
+    assert time_coord.units.calendar in ('standard', 'gregorian')
+    assert time_coord.units.origin == 'days since 1850-01-01'
+
+    desired = np.array((14., 45., 73.))
+    np.testing.assert_array_equal(time_coord.points, desired)
+
+
+def generate_cubes_with_non_overlapping_timecoords():
+    """Generate sample data where time coords do not overlap."""
+    time_points = range(1, 4)
+    dates1 = [datetime(1850, i, 15, 0, 0, 0) for i in time_points]
+    dates2 = [datetime(1950, i, 15, 0, 0, 0) for i in time_points]
+
+    return (
+        generate_cube_from_dates(dates1),
+        generate_cube_from_dates(dates2),
+    )
+
+
+@pytest.mark.xfail(reason='Multimodel statistics returns the original cubes.')
+def test_edge_case_time_no_overlap_fail():
+    """Test case when time coords do not overlap using span='overlap'.
+
+    Expected behaviour: `multi_model_statistics` should fail if time
+    points are not overlapping.
     """
-    statistic = 'mean'
-    result = multimodel_test(cubes, statistic=statistic, span=span)
+    cubes = generate_cubes_with_non_overlapping_timecoords()
+
+    statistic = 'min'
+    statistics = (statistic, )
+
+    with pytest.raises(ValueError):
+        _ = multi_model_statistics(cubes, 'overlap', statistics)
+
+
+def test_edge_case_time_no_overlap_success():
+    """Test case when time coords do not overlap using span='full'.
+
+    Expected behaviour: `multi_model_statistics` should use all
+    available time points.
+    """
+    cubes = generate_cubes_with_non_overlapping_timecoords()
+
+    statistic = 'min'
+    statistics = (statistic, )
+
+    result = multi_model_statistics(cubes, 'full', statistics)
     result_cube = result[statistic]
 
-    filename = Path(__file__).with_name(f'{name}-{span}-{statistic}.nc')
-    if filename.exists():
-        reference_cube = iris.load_cube(str(filename))
+    assert result_cube.coord('time').shape == (6, )
 
-        assert_array_almost_equal(result_cube.data, reference_cube.data)
-        assert_metadata_equal(result_cube.metadata, reference_cube.metadata)
-        assert_coords_equal(result_cube.coords(), reference_cube.coords())
 
-    else:
-        # The test will fail if no regression data are available.
-        iris.save(result_cube, filename)
-        raise RuntimeError(f'Wrote reference data to {filename.absolute()}')
-
-
-@pytest.mark.xfail(
-    raises=iris.exceptions.MergeError,
-    reason='https://github.com/ESMValGroup/ESMValCore/issues/956')
-@pytest.mark.use_sample_data
-@pytest.mark.parametrize('span', SPAN_PARAMS)
-def test_multimodel_regression_month(timeseries_cubes_month, span):
-    """Test statistic."""
-    cubes = timeseries_cubes_month
-    name = 'timeseries_monthly'
-    multimodel_regression_test(
-        name=name,
-        span=span,
-        cubes=cubes,
+@pytest.mark.parametrize('span', SPAN_OPTIONS)
+def test_edge_case_time_not_in_middle_of_months(span):
+    """Test case when time coords are not on 15th for monthly data.
+
+    Expected behaviour: `multi_model_statistics` will set all dates to
+    the 15th.
+    """
+    time_points = range(1, 4)
+    dates1 = [datetime(1850, i, 12, 0, 0, 0) for i in time_points]
+    dates2 = [datetime(1850, i, 25, 0, 0, 0) for i in time_points]
+
+    cubes = (
+        generate_cube_from_dates(dates1),
+        generate_cube_from_dates(dates2),
     )
 
+    statistic = 'min'
+    statistics = (statistic, )
+
+    result = multi_model_statistics(cubes, span, statistics)
+    result_cube = result[statistic]
+
+    time_coord = result_cube.coord('time')
+
+    desired = np.array((14., 45., 73.))
+    np.testing.assert_array_equal(time_coord.points, desired)
+
+
+@pytest.mark.parametrize('span', SPAN_OPTIONS)
+def test_edge_case_sub_daily_data_fail(span):
+    """Test case when cubes with sub-daily time coords are passed."""
+    cube = generate_cube_from_dates('hourly')
+    cubes = (cube, cube)
+
+    statistic = 'min'
+    statistics = (statistic, )
+
+    with pytest.raises(ValueError):
+        _ = multi_model_statistics(cubes, span, statistics)
+
+
+@pytest.mark.parametrize('span', SPAN_OPTIONS)
+def test_edge_case_single_cube_fail(span):
+    """Test that an error is raised when a single cube is passed."""
+    cube = generate_cube_from_dates('monthly')
+    cubes = (cube, )
+
+    statistic = 'min'
+    statistics = (statistic, )
+
+    with pytest.raises(ValueError):
+        _ = multi_model_statistics(cubes, span, statistics)
+
+
+def test_unify_time_coordinates():
+    """Test set common calendar."""
+    cube1 = generate_cube_from_dates('monthly',
+                                     calendar='360_day',
+                                     offset='days since 1850-01-01')
+    cube2 = generate_cube_from_dates('monthly',
+                                     calendar='standard',
+                                     offset='days since 1943-05-16')
+
+    mm._unify_time_coordinates([cube1, cube2])
+
+    assert cube1.coord('time') == cube2.coord('time')
+
+
+class PreprocessorFile:
+    """Mockup to test output of multimodel."""
+
+    def __init__(self, cube=None, attributes=None):
+        if cube:
+            self.cubes = [cube]
+        if attributes:
+            self.attributes = attributes
+
+    def wasderivedfrom(self, product):
+        pass
+
+    def group(self, keys: list) -> str:
+        """Generate group keyword.
+
+        Returns a string that identifies a group. Concatenates a list of
+        values from .attributes
+        """
+        if not keys:
+            return ''
+
+        if isinstance(keys, str):
+            keys = [keys]
+
+        identifier = []
+        for key in keys:
+            attribute = self.attributes.get(key)
+            if attribute:
+                if isinstance(attribute, (list, tuple)):
+                    attribute = '-'.join(attribute)
+                identifier.append(attribute)
+
+        return '_'.join(identifier)
 
-@pytest.mark.use_sample_data
-@pytest.mark.parametrize('calendar', CALENDAR_PARAMS)
-@pytest.mark.parametrize('span', SPAN_PARAMS)
-def test_multimodel_regression_day(timeseries_cubes_day, span, calendar):
-    """Test statistic."""
-    cubes = timeseries_cubes_day[calendar]
-    name = f'timeseries_daily_{calendar}'
-    multimodel_regression_test(
-        name=name,
-        span=span,
-        cubes=cubes,
+
+def test_return_products():
+    """Check that the right product set is returned."""
+    cube1 = generate_cube_from_dates('monthly', fill_val=1)
+    cube2 = generate_cube_from_dates('monthly', fill_val=9)
+
+    input1 = PreprocessorFile(cube1)
+    input2 = PreprocessorFile(cube2)
+
+    products = set([input1, input2])
+
+    output = PreprocessorFile()
+    output_products = {'': {'mean': output}}
+
+    kwargs = {
+        'statistics': ['mean'],
+        'span': 'full',
+        'output_products': output_products['']
+    }
+
+    result1 = mm._multiproduct_statistics(products,
+                                          keep_input_datasets=True,
+                                          **kwargs)
+
+    result2 = mm._multiproduct_statistics(products,
+                                          keep_input_datasets=False,
+                                          **kwargs)
+
+    assert result1 == set([input1, input2, output])
+    assert result2 == set([output])
+
+    kwargs['output_products'] = output_products
+    result3 = mm.multi_model_statistics(products, **kwargs)
+    result4 = mm.multi_model_statistics(products,
+                                        keep_input_datasets=False,
+                                        **kwargs)
+
+    assert result3 == result1
+    assert result4 == result2
+
+
+def test_ensemble_products():
+    cube1 = generate_cube_from_dates('monthly', fill_val=1)
+    cube2 = generate_cube_from_dates('monthly', fill_val=9)
+
+    attributes1 = {
+        'project': 'project', 'dataset': 'dataset',
+        'exp': 'exp', 'ensemble': '1'}
+    input1 = PreprocessorFile(cube1, attributes=attributes1)
+
+    attributes2 = {
+        'project': 'project', 'dataset': 'dataset',
+        'exp': 'exp', 'ensemble': '2'}
+    input2 = PreprocessorFile(cube2, attributes=attributes2)
+
+    attributes3 = {
+        'project': 'project', 'dataset': 'dataset2',
+        'exp': 'exp', 'ensemble': '1'}
+    input3 = PreprocessorFile(cube1, attributes=attributes3)
+
+    attributes4 = {
+        'project': 'project', 'dataset': 'dataset2',
+        'exp': 'exp', 'ensemble': '2'}
+
+    input4 = PreprocessorFile(cube1, attributes=attributes4)
+    products = set([input1, input2, input3, input4])
+
+    output1 = PreprocessorFile()
+    output2 = PreprocessorFile()
+    output_products = {
+        'project_dataset_exp': {'mean': output1},
+        'project_dataset2_exp': {'mean': output2}}
+
+    kwargs = {
+        'statistics': ['mean'],
+        'output_products': output_products,
+    }
+
+    result = mm.ensemble_statistics(
+        products, **kwargs)
+    assert len(result) == 2
+
+
+def test_ignore_tas_scalar_height_coord():
+    """Ignore conflicting aux_coords for height in tas."""
+    tas_2m = generate_cube_from_dates("monthly")
+    tas_1p5m = generate_cube_from_dates("monthly")
+
+    for cube, height in zip([tas_2m, tas_1p5m], [2., 1.5]):
+        cube.rename("air_temperature")
+        cube.attributes["short_name"] = "tas"
+        cube.add_aux_coord(
+            iris.coords.AuxCoord([height], var_name="height", units="m"))
+
+    result = mm.multi_model_statistics(
+        [tas_2m, tas_2m.copy(), tas_1p5m], statistics=['mean'], span='full')
+
+    # iris automatically averages the value of the scalar coordinate.
+    assert len(result['mean'].coords("height")) == 1
+    assert result["mean"].coord("height").points == 1.75
+
+
+def test_daily_inconsistent_calendars():
+    """Determine behaviour for inconsistent calendars.
+
+    Deviating calendars should be converted to standard. Missing data
+    inside original bounds is filled with nearest neighbour Missing data
+    outside original bounds is masked.
+    """
+    ref_standard = Unit("days since 1850-01-01", calendar="standard")
+    ref_noleap = Unit("days since 1850-01-01", calendar="noleap")
+    start = date2num(datetime(1852, 1, 1), ref_standard)
+
+    # 1852 is a leap year, and include 1 extra day at the end
+    leapdates = cftime.num2date(start + np.arange(367),
+                                ref_standard.name, ref_standard.calendar)
+
+    noleapdates = cftime.num2date(start + np.arange(365),
+                                  ref_noleap.name, ref_noleap.calendar)
+
+    leapcube = generate_cube_from_dates(
+        leapdates,
+        calendar='standard',
+        offset='days since 1850-01-01',
+        fill_val=1,
     )
 
+    noleapcube = generate_cube_from_dates(
+        noleapdates,
+        calendar='noleap',
+        offset='days since 1850-01-01',
+        fill_val=3,
+    )
 
-@pytest.mark.use_sample_data
-def test_multimodel_no_vertical_dimension(timeseries_cubes_month):
-    """Test statistic without vertical dimension using monthly data."""
-    span = 'full'
-    cubes = timeseries_cubes_month
-    cubes = [cube[:, 0] for cube in cubes]
-    multimodel_test(cubes, span=span, statistic='mean')
-
-
-@pytest.mark.use_sample_data
-@pytest.mark.xfail(
-    raises=iris.exceptions.MergeError,
-    reason='https://github.com/ESMValGroup/ESMValCore/issues/956')
-# @pytest.mark.xfail(
-#     raises=iris.exceptions.CoordinateNotFoundError,
-#     reason='https://github.com/ESMValGroup/ESMValCore/issues/891')
-def test_multimodel_no_horizontal_dimension(timeseries_cubes_month):
-    """Test statistic without horizontal dimension using monthly data."""
-    span = 'full'
-    cubes = timeseries_cubes_month
-    cubes = [cube[:, :, 0, 0] for cube in cubes]
-    # Coordinate not found error
-    # iris.exceptions.CoordinateNotFoundError:
-    # 'Expected to find exactly 1 depth coordinate, but found none.'
-    multimodel_test(cubes, span=span, statistic='mean')
-
-
-@pytest.mark.use_sample_data
-def test_multimodel_only_time_dimension(timeseries_cubes_month):
-    """Test statistic without only the time dimension using monthly data."""
-    cubes = timeseries_cubes_month
-    span = 'full'
-    cubes = [cube[:, 0, 0, 0] for cube in cubes]
-    multimodel_test(cubes, span=span, statistic='mean')
-
-
-@pytest.mark.use_sample_data
-@pytest.mark.xfail(
-    raises=ValueError,
-    reason='https://github.com/ESMValGroup/ESMValCore/issues/890')
-def test_multimodel_no_time_dimension(timeseries_cubes_month):
-    """Test statistic without time dimension using monthly data."""
-    span = 'full'
-    cubes = timeseries_cubes_month
-    cubes = [cube[0] for cube in cubes]
-    # ValueError: Cannot guess bounds for a coordinate of length 1.
-    multimodel_test(cubes, span=span, statistic='mean')
+    cubes = [leapcube, noleapcube]
+
+    # span=full
+    aligned_cubes = mm._align(cubes, span='full')
+    for cube in aligned_cubes:
+        assert cube.coord('time').units.calendar in ("standard", "gregorian")
+        assert cube.shape == (367, )
+        assert cube[59].coord('time').points == 789  # 29 Feb 1852
+    np.ma.is_masked(aligned_cubes[1][366].data)  # outside original range
+
+    result = multi_model_statistics(cubes, span="full", statistics=['mean'])
+    result_cube = result['mean']
+    assert result_cube[59].data == 2  # looked up nearest neighbour
+    assert result_cube[366].data == 1  # outside original range
+
+    # span=overlap
+    aligned_cubes = mm._align(cubes, span='overlap')
+    for cube in aligned_cubes:
+        assert cube.coord('time').units.calendar in ("standard", "gregorian")
+        assert cube.shape == (365, )
+        assert cube[59].coord('time').points == 790  # 1 March 1852
+
+    result = multi_model_statistics(cubes, span="overlap", statistics=['mean'])
+    result_cube = result['mean']
+    assert result_cube[59].data == 2
+
+
+def test_remove_fx_variables():
+    """Test fx variables are removed from cubes."""
+    cube1 = generate_cube_from_dates("monthly")
+    fx_cube = generate_cube_from_dates("monthly")
+    fx_cube.standard_name = "land_area_fraction"
+    add_ancillary_variable(cube1, fx_cube)
+
+    cube2 = generate_cube_from_dates("monthly", fill_val=9)
+    result = mm.multi_model_statistics([cube1, cube2],
+                                       statistics=['mean'],
+                                       span='full')
+    assert result['mean'].ancillary_variables() == []
+
+
+def test_no_warn_model_dim_non_contiguous(recwarn):
+    """Test that now warning is raised that model dim is non-contiguous."""
+    coord = iris.coords.DimCoord(
+        [0.5, 1.5],
+        bounds=[[0, 1.], [1., 2.]],
+        standard_name='time',
+        units='days since 1850-01-01',
+    )
+    cube1 = iris.cube.Cube([1, 1], dim_coords_and_dims=[(coord, 0)])
+    cube2 = iris.cube.Cube([2, 2], dim_coords_and_dims=[(coord, 0)])
+    cubes = [cube1, cube2]
+
+    multi_model_statistics(cubes, span="overlap", statistics=['mean'])
+    msg = ("Collapsing a non-contiguous coordinate. "
+           "Metadata may not be fully descriptive for 'multi-model'.")
+    for warning in recwarn:
+        assert str(warning.message) != msg
+
+
+def test_map_to_new_time_int_coords():
+    """Test ``_map_to_new_time`` with integer time coords."""
+    cube = generate_cube_from_dates('yearly')
+    iris.coord_categorisation.add_year(cube, 'time')
+    decade_coord = AuxCoord([1850, 1850, 1850], bounds=[[1845, 1855]] * 3,
+                            long_name='decade')
+    cube.add_aux_coord(decade_coord, 0)
+    target_points = [200.0, 500.0, 1000.0]
+
+    out_cube = mm._map_to_new_time(cube, target_points)
+
+    assert_array_allclose(out_cube.data,
+                          np.ma.masked_invalid([1.0, 1.0, np.nan]))
+    assert_array_allclose(out_cube.coord('time').points, target_points)
+    assert_array_allclose(out_cube.coord('year').points,
+                          np.ma.masked_invalid([1850, 1851, np.nan]))
+    assert_array_allclose(out_cube.coord('decade').points,
+                          np.ma.masked_invalid([1850, 1850, np.nan]))
+    assert out_cube.coord('year').bounds is None
+    assert out_cube.coord('decade').bounds is None
+    assert np.issubdtype(out_cube.coord('year').dtype, np.integer)
+    assert np.issubdtype(out_cube.coord('decade').dtype, np.integer)
+
+
+def test_preserve_equal_coordinates():
+    """Test ``multi_model_statistics`` with equal input coordinates."""
+    cubes = get_cube_for_equal_coords_test(5)
+    stat_cubes = multi_model_statistics(cubes, span='overlap',
+                                        statistics=['sum'])
+
+    assert len(stat_cubes) == 1
+    assert 'sum' in stat_cubes
+    stat_cube = stat_cubes['sum']
+    assert_array_allclose(stat_cube.data, np.ma.array([5.0, 5.0, 5.0]))
+
+    # The equal coordinate ('year') was not changed; the non-equal one ('x')
+    # does not have a long_name and attributes anymore
+    assert stat_cube.coord('year').var_name == 'year'
+    assert stat_cube.coord('year').standard_name is None
+    assert stat_cube.coord('year').long_name == 'year'
+    assert stat_cube.coord('year').attributes == {'test': 1}
+    assert stat_cube.coord('x').var_name == 'x'
+    assert stat_cube.coord('x').standard_name is None
+    assert stat_cube.coord('x').long_name is None
+    assert stat_cube.coord('x').attributes == {}
diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py
index 7d1f6bc2bc..00ced5a348 100644
--- a/tests/unit/preprocessor/_multimodel/test_multimodel.py
+++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py
@@ -22,7 +22,7 @@
 
 FREQUENCY_OPTIONS = ('daily', 'monthly', 'yearly')  # hourly
 
-CALENDAR_OPTIONS = ('360_day', '365_day', 'gregorian', 'proleptic_gregorian',
+CALENDAR_OPTIONS = ('360_day', '365_day', 'standard', 'proleptic_gregorian',
                     'julian')
 
 
@@ -35,7 +35,7 @@ def assert_array_allclose(this, other):
 
 
 def timecoord(frequency,
-              calendar='gregorian',
+              calendar='standard',
               offset='days since 1850-01-01',
               num=3):
     """Return a time coordinate with the given time points and calendar."""
@@ -58,7 +58,7 @@ def timecoord(frequency,
 
 def generate_cube_from_dates(
     dates,
-    calendar='gregorian',
+    calendar='standard',
     offset='days since 1850-01-01',
     fill_val=1,
     len_data=3,
@@ -350,19 +350,19 @@ def test_unsupported_statistics_fail(statistic, error):
 
 
 @pytest.mark.parametrize('calendar1, calendar2, expected', (
-    ('360_day', '360_day', '360_day'),
-    ('365_day', '365_day', '365_day'),
-    ('365_day', '360_day', 'gregorian'),
-    ('360_day', '365_day', 'gregorian'),
-    ('gregorian', '365_day', 'gregorian'),
-    ('proleptic_gregorian', 'julian', 'gregorian'),
-    ('julian', '365_day', 'gregorian'),
+    ('360_day', '360_day', ('360_day',)),
+    ('365_day', '365_day', ('365_day',)),
+    ('365_day', '360_day', ('standard', 'gregorian')),
+    ('360_day', '365_day', ('standard', 'gregorian')),
+    ('standard', '365_day', ('standard', 'gregorian')),
+    ('proleptic_gregorian', 'julian', ('standard', 'gregorian')),
+    ('julian', '365_day', ('standard', 'gregorian')),
 ))
 def test_get_consistent_time_unit(calendar1, calendar2, expected):
     """Test same calendar returned or default if calendars differ.
 
     Expected behaviour: If the calendars are the same, return that one.
-    If the calendars are not the same, return 'gregorian'.
+    If the calendars are not the same, return 'standard'.
     """
     cubes = (
         generate_cube_from_dates('monthly', calendar=calendar1),
@@ -370,7 +370,7 @@ def test_get_consistent_time_unit(calendar1, calendar2, expected):
     )
 
     result = mm._get_consistent_time_unit(cubes)
-    assert result.calendar == expected
+    assert result.calendar in expected
 
 
 @pytest.mark.parametrize('span', SPAN_OPTIONS)
@@ -395,7 +395,7 @@ def test_align(span):
     calendars = set(cube.coord('time').units.calendar for cube in result_cubes)
 
     assert len(calendars) == 1
-    assert list(calendars)[0] == 'gregorian'
+    assert list(calendars)[0] in ('standard', 'gregorian')
 
     shapes = set(cube.shape for cube in result_cubes)
 
@@ -528,7 +528,7 @@ def test_edge_case_different_time_offsets(span):
 
     time_coord = result_cube.coord('time')
 
-    assert time_coord.units.calendar == 'gregorian'
+    assert time_coord.units.calendar in ('standard', 'gregorian')
     assert time_coord.units.origin == 'days since 1850-01-01'
 
     desired = np.array((14., 45., 73.))
@@ -640,7 +640,7 @@ def test_unify_time_coordinates():
                                      calendar='360_day',
                                      offset='days since 1850-01-01')
     cube2 = generate_cube_from_dates('monthly',
-                                     calendar='gregorian',
+                                     calendar='standard',
                                      offset='days since 1943-05-16')
 
     mm._unify_time_coordinates([cube1, cube2])
@@ -787,7 +787,7 @@ def test_ignore_tas_scalar_height_coord():
 def test_daily_inconsistent_calendars():
     """Determine behaviour for inconsistent calendars.
 
-    Deviating calendars should be converted to gregorian. Missing data
+    Deviating calendars should be converted to standard. Missing data
     inside original bounds is filled with nearest neighbour Missing data
     outside original bounds is masked.
     """
@@ -804,7 +804,7 @@ def test_daily_inconsistent_calendars():
 
     leapcube = generate_cube_from_dates(
         leapdates,
-        calendar='gregorian',
+        calendar='standard',
         offset='days since 1850-01-01',
         fill_val=1,
     )
@@ -821,7 +821,7 @@ def test_daily_inconsistent_calendars():
     # span=full
     aligned_cubes = mm._align(cubes, span='full')
     for cube in aligned_cubes:
-        assert cube.coord('time').units.calendar == "gregorian"
+        assert cube.coord('time').units.calendar in ("standard", "gregorian")
         assert cube.shape == (367, )
         assert cube[59].coord('time').points == 789  # 29 Feb 1852
     np.ma.is_masked(aligned_cubes[1][366].data)  # outside original range
@@ -834,7 +834,7 @@ def test_daily_inconsistent_calendars():
     # span=overlap
     aligned_cubes = mm._align(cubes, span='overlap')
     for cube in aligned_cubes:
-        assert cube.coord('time').units.calendar == "gregorian"
+        assert cube.coord('time').units.calendar in ("standard", "gregorian")
         assert cube.shape == (365, )
         assert cube[59].coord('time').points == 790  # 1 March 1852