diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 5c4b14e351..7ad4bdbcec 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -1219,6 +1219,7 @@ The ``_time.py`` module contains the following preprocessor functions: * regrid_time_: Aligns the time axis of each dataset to have common time points and calendars. * timeseries_filter_: Allows application of a filter to the time-series data. +* local_solar_time_: Convert cube with UTC time to local solar time. Statistics functions are applied by default in the order they appear in the list. For example, the following example applied to hourly data will retrieve @@ -1653,6 +1654,55 @@ Examples: See also :func:`esmvalcore.preprocessor.timeseries_filter`. +.. _local_solar_time: + +``local_solar_time`` +-------------------- + +Many variables in the Earth system show a strong diurnal cycle. +The reason for that is of course Earth's rotation around its own axis, which +leads to a diurnal cycle of the incoming solar radiation. +While UTC time is a very good absolute time measure, it is not really suited to +analyze diurnal cycles over larger regions. +For example, diurnal cycles over Russia and the USA are phase-shifted by ~180° += 12 hr in UTC time. + +This is where the `local solar time (LST) +`__ comes into play: +For a given location, 12:00 noon LST is defined as the moment when the sun +reaches its highest point in the sky. +By using this definition based on the origin of the diurnal cycle (the sun), we +can directly compare diurnal cycles across the globe. +LST is mainly determined by the longitude of a location, but due to the +eccentricity of Earth's orbit, it also depends on the day of year (see +`equation of time `__). +However, this correction is at most ~15 min, which is usually smaller than the +highest frequency output of CMIP6 models (1 hr) and smaller than the time scale +for diurnal evolution of meteorological phenomena (which is in the order of +hours, not minutes). +Thus, instead, we use the **mean** LST, which solely depends on longitude: + +.. math:: + + LST = UTC + 12 \cdot \frac{lon}{180°} + +where the times are given in hours and `lon` in degrees in the interval [-180, +180]. +To transform data from UTC to LST, this preprocessor shifts data along the time +axis based on the longitude. + +This preprocessor does not need any additional parameters. + +Example: + +.. code-block:: yaml + + calculate_local_solar_time: + local_solar_time: + +See also :func:`esmvalcore.preprocessor.local_solar_time`. + + .. _area operations: Area manipulation diff --git a/esmvalcore/iris_helpers.py b/esmvalcore/iris_helpers.py index e5bc3dbeea..8fbd794b3e 100644 --- a/esmvalcore/iris_helpers.py +++ b/esmvalcore/iris_helpers.py @@ -1,11 +1,14 @@ """Auxiliary functions for :mod:`iris`.""" -from typing import Dict, List, Sequence +from __future__ import annotations + +from typing import Dict, Iterable, List, Literal, Sequence import dask.array as da import iris import iris.cube import iris.util import numpy as np +from iris.coords import Coord from iris.cube import Cube from iris.exceptions import CoordinateMultiDimError @@ -157,3 +160,111 @@ def merge_cube_attributes( # Step 3: modify the cubes in-place for cube in cubes: cube.attributes = final_attributes + + +def _rechunk( + array: da.core.Array, + complete_dims: list[int], + remaining_dims: int | Literal['auto'], +) -> da.core.Array: + """Rechunk a given array so that it is not chunked along given dims.""" + new_chunks: list[str | int] = [remaining_dims] * array.ndim + for dim in complete_dims: + new_chunks[dim] = -1 + return array.rechunk(new_chunks) + + +def _rechunk_dim_metadata( + cube: Cube, + complete_dims: Iterable[int], + remaining_dims: int | Literal['auto'] = 'auto', +) -> None: + """Rechunk dimensional metadata of a cube (in-place).""" + # Non-dimensional coords that span complete_dims + # Note: dimensional coords are always realized (i.e., numpy arrays), so no + # chunking is necessary + for coord in cube.coords(dim_coords=False): + dims = cube.coord_dims(coord) + complete_dims_ = [dims.index(d) for d in complete_dims if d in dims] + if complete_dims_: + if coord.has_lazy_points(): + coord.points = _rechunk( + coord.lazy_points(), complete_dims_, remaining_dims + ) + if coord.has_bounds() and coord.has_lazy_bounds(): + coord.bounds = _rechunk( + coord.lazy_bounds(), complete_dims_, remaining_dims + ) + + # Rechunk cell measures that span complete_dims + for measure in cube.cell_measures(): + dims = cube.cell_measure_dims(measure) + complete_dims_ = [dims.index(d) for d in complete_dims if d in dims] + if complete_dims_ and measure.has_lazy_data(): + measure.data = _rechunk( + measure.lazy_data(), complete_dims_, remaining_dims + ) + + # Rechunk ancillary variables that span complete_dims + for anc_var in cube.ancillary_variables(): + dims = cube.ancillary_variable_dims(anc_var) + complete_dims_ = [dims.index(d) for d in complete_dims if d in dims] + if complete_dims_ and anc_var.has_lazy_data(): + anc_var.data = _rechunk( + anc_var.lazy_data(), complete_dims_, remaining_dims + ) + + +def rechunk_cube( + cube: Cube, + complete_coords: Iterable[Coord | str], + remaining_dims: int | Literal['auto'] = 'auto', +) -> Cube: + """Rechunk cube so that it is not chunked along given dimensions. + + This will rechunk the cube's data, but also all non-dimensional + coordinates, cell measures, and ancillary variables that span at least one + of the given dimensions. + + Note + ---- + This will only rechunk `dask` arrays. `numpy` arrays are not changed. + + Parameters + ---------- + cube: + Input cube. + complete_coords: + (Names of) coordinates along which the output cubes should not be + chunked. The given coordinates must span exactly 1 dimension. + remaining_dims: + Chunksize of the remaining dimensions. + + Returns + ------- + Cube + Rechunked cube. This will always be a copy of the input cube. + + """ + cube = cube.copy() # do not modify input cube + + # Make sure that complete_coords span exactly 1 dimension + complete_dims = [] + for coord in complete_coords: + coord = cube.coord(coord) + dims = cube.coord_dims(coord) + if len(dims) != 1: + raise CoordinateMultiDimError( + f"Complete coordinates must be 1D coordinates, got " + f"{len(dims):d}D coordinate '{coord.name()}'" + ) + complete_dims.append(dims[0]) + + # Rechunk data + if cube.has_lazy_data(): + cube.data = _rechunk(cube.lazy_data(), complete_dims, remaining_dims) + + # Rechunk dimensional metadata + _rechunk_dim_metadata(cube, complete_dims, remaining_dims=remaining_dims) + + return cube diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index 4a5027bce5..2144e5edce 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -70,6 +70,7 @@ extract_season, extract_time, hourly_statistics, + local_solar_time, monthly_statistics, regrid_time, resample_hours, @@ -148,8 +149,6 @@ 'extract_volume', 'extract_trajectory', 'extract_transect', - # 'average_zone': average_zone, - # 'cross_section': cross_section, 'detrend', 'extract_named_regions', 'axis_statistics', @@ -157,8 +156,7 @@ 'area_statistics', 'volume_statistics', # Time operations - # 'annual_cycle': annual_cycle, - # 'diurnal_cycle': diurnal_cycle, + 'local_solar_time', 'amplitude', 'zonal_statistics', 'meridional_statistics', diff --git a/esmvalcore/preprocessor/_time.py b/esmvalcore/preprocessor/_time.py index 7d5e7ed603..ca4e95ce56 100644 --- a/esmvalcore/preprocessor/_time.py +++ b/esmvalcore/preprocessor/_time.py @@ -9,6 +9,7 @@ import datetime import logging import warnings +from functools import partial from typing import Iterable, Optional from warnings import filterwarnings @@ -16,18 +17,23 @@ import dask.config import iris import iris.coord_categorisation -import iris.exceptions import iris.util import isodate import numpy as np -from iris.coords import AuxCoord +from cf_units import Unit +from iris.coords import AuxCoord, Coord, DimCoord from iris.cube import Cube, CubeList +from iris.exceptions import CoordinateMultiDimError, CoordinateNotFoundError from iris.time import PartialDateTime +from iris.util import broadcast_to_shape +from numpy.typing import DTypeLike from esmvalcore.cmor.fixes import get_next_month, get_time_bounds -from esmvalcore.iris_helpers import date2num - -from ._shared import get_iris_aggregator, update_weights_kwargs +from esmvalcore.iris_helpers import date2num, rechunk_cube +from esmvalcore.preprocessor._shared import ( + get_iris_aggregator, + update_weights_kwargs, +) logger = logging.getLogger(__name__) @@ -1150,7 +1156,7 @@ def timeseries_filter( """ try: cube.coord('time') - except iris.exceptions.CoordinateNotFoundError: + except CoordinateNotFoundError: logger.error("Cube %s does not have time coordinate", cube) raise @@ -1292,3 +1298,426 @@ def resample_time( raise ValueError( f"Time coordinate {dates} does not contain {requested} for {cube}") return cube + + +def _lin_pad(array: np.ndarray, delta: float, pad_with: int) -> np.ndarray: + """Linearly pad an array on both sides with constant difference.""" + end_values = (array[0] - pad_with * delta, array[-1] + pad_with * delta) + new_array = np.pad(array, pad_with, 'linear_ramp', end_values=end_values) + return new_array + + +def _guess_time_bounds(time_coord: DimCoord) -> None: + """Guess bounds of time coordinate in-place.""" + if time_coord.has_bounds(): + return + try: + time_coord.guess_bounds() + except ValueError: # coordinate has only 1 point + point = time_coord.points[0] + time_coord.bounds = [[point - 0.5, point + 0.5]] + + +def _get_lst_offset(lon_coord: Coord) -> np.ndarray: + """Get offsets to shift UTC time to local solar time (LST). + + Note + ---- + This function expects longitude in degrees. Can be in [0, 360] or [-180, + 180] format. + + """ + # Make sure that longitude is in degrees and shift it to [-180, 180] first + # (do NOT overwrite input coordinate) + lon_coord = lon_coord.copy() + lon_coord.convert_units('degrees') + shifted_lon = (lon_coord.points + 180.0) % 360 - 180.0 + return 12.0 * (shifted_lon / 180.0) + + +def _get_lsts(time_coord: DimCoord, lon_coord: Coord) -> np.ndarray: + """Get array of binned local solar times (LSTs) of shape (lon, time). + + Note + ---- + LSTs outside of the time bins given be the time coordinate bounds are put + into a bin below/above the time coordinate. + + """ + # Pad time coordinate with 1 time step at both sides for the bins for LSTs + # outside of the time coordinate + dtime = np.abs( + time_coord.bounds[0, 1] - time_coord.bounds[0, 0] + ) + new_points = _lin_pad(time_coord.points, dtime, 1) + bnds = time_coord.bounds + new_bounds = np.stack( + (_lin_pad(bnds[:, 0], dtime, 1), _lin_pad(bnds[:, 1], dtime, 1)), + axis=-1, + ) + time_coord = time_coord.copy(new_points, bounds=new_bounds) + + n_time = time_coord.shape[0] + n_lon = lon_coord.shape[0] + + # Calculate LST + time_array = np.broadcast_to(time_coord.points, (n_lon, n_time)) + time_offsets = _get_lst_offset(lon_coord).reshape(-1, 1) + exact_lst_array = time_array + time_offsets # (lon, time) + + # Put LST into bins given be the time coordinate bounds + bins = np.concatenate(([time_coord.bounds[0, 0]], time_coord.bounds[:, 1])) + idx = np.digitize(exact_lst_array, bins) - 1 # (lon, time); idx for time + idx[idx < 0] = 0 # values outside the time coordinate + idx[idx >= n_time] = - 1 # values outside the time coordinate + lst_array = time_coord.points[idx] # (lon, time) + + # Remove time steps again that have been added previously + lst_array = lst_array[:, 1:-1] + + return lst_array + + +def _get_time_index_and_mask( + time_coord: DimCoord, + lon_coord: Coord, +) -> tuple[np.ndarray, np.ndarray]: + """Get advanced index and mask for time dimension of shape (time, lon). + + Note + ---- + The mask considers the fact that not all values for all local solar times + (LSTs) are given. E.g., for hourly data with first time point 01:00:00 + UTC, LST in Berlin is already 02:00:00 (assuming no daylight saving time). + Thus, for 01:00:00 LST on this day, there is no value for Berlin. + + """ + # Make sure that time coordinate has bounds (these are necessary for the + # binning) and uses 'hours' as reference units + time_coord.convert_units( + Unit('hours since 1850-01-01', calendar=time_coord.units.calendar) + ) + _guess_time_bounds(time_coord) + + lsts = _get_lsts(time_coord, lon_coord) # (lon, time) + n_time = time_coord.points.shape[0] + + # We use np.searchsorted to calculate the indices necessary to put the UTC + # times into their corresponding (binned) LSTs. These incides are 2D since + # they depend on time and longitude. + searchsorted_l = partial(np.searchsorted, side='left') + _get_indices_l = np.vectorize(searchsorted_l, signature='(i),(i)->(i)') + time_index_l = _get_indices_l(lsts, time_coord.points) # (lon, time) + + # To calculate the mask, we need to detect which LSTs are outside of the + # time coordinate. Unfortunately, searchsorted can only detect outliers on + # one side of the array. This side is determined by the `side` keyword + # argument. To consistently detect outliers on both sides, we use + # searchsorted again, this time with `side='right'` (the default is + # 'left'). Indices that are the same in both arrays need to be masked, as + # these are the ones outside of the time coordinate. All others will + # change. + searchsorted_r = partial(np.searchsorted, side='right') + _get_indices_r = np.vectorize(searchsorted_r, signature='(i),(i)->(i)') + time_index_r = _get_indices_r(lsts, time_coord.points) # (lon, time) + mask = time_index_l == time_index_r # (lon, time) + + # The index is given by the left indices (these are identical to the right + # indices minus 1) + time_index_l[time_index_l < 0] = 0 # will be masked + time_index_l[time_index_l >= n_time] = -1 # will be masked + + return (time_index_l.T, mask.T) # (time, lon) + + +def _transform_to_lst_eager( + data: np.ndarray, + *, + time_index: np.ndarray, + mask: np.ndarray, + time_dim: int, + lon_dim: int, + **__, +) -> np.ndarray: + """Transform array with UTC coord to local solar time (LST) coord. + + Note + ---- + This function is the eager version of `_transform_to_lst_lazy`. + + `data` needs to be at least 2D. `time_dim` and `lon_dim` correspond to the + dimensions that describe time and longitude dimensions in `data`, + respectively. + + `time_index` is an `advanced index + `__ + for the time dimension of `data` with shape (time, lon). It is used to + reorder the data along the time axis based on the longitude axis. + + `mask` is 2D with shape (time, lon) that will be applied to the final data. + + """ + # Apart from the time index, all other dimensions will stay the same; this + # is ensured with np.ogrid + idx = np.ogrid[tuple(slice(0, d) for d in data.shape)] + time_index = broadcast_to_shape( + time_index, data.shape, (time_dim, lon_dim) + ) + idx[time_dim] = time_index + new_data = data[tuple(idx)] + + # Apply properly broadcasted mask + mask = broadcast_to_shape(mask, new_data.shape, (time_dim, lon_dim)) + new_mask = mask | np.ma.getmaskarray(new_data) + + return np.ma.masked_array(new_data, mask=new_mask) + + +def _transform_to_lst_lazy( + data: da.core.Array, + *, + time_index: np.ndarray, + mask: np.ndarray, + time_dim: int, + lon_dim: int, + output_dtypes: DTypeLike, +) -> da.core.Array: + """Transform array with UTC coord to local solar time (LST) coord. + + Note + ---- + This function is the lazy version of `_transform_to_lst_eager` using + dask's :func:`dask.array.apply_gufunc`. + + `data` needs to be at least 2D. `time_dim` and `lon_dim` correspond to the + dimensions that describe time and longitude dimensions in `data`, + respectively. + + `time_index` is an `advanced index + `__ + for the time dimension of `data` with shape (time, lon). It is used to + reorder the data along the time axis based on the longitude axis. + + `mask` is 2D with shape (time, lon) that will be applied to the final data. + + """ + _transform_chunk_to_lst = partial( + _transform_to_lst_eager, + time_index=time_index, + mask=mask, + time_dim=-2, # this is ensured by da.apply_gufunc + lon_dim=-1, # this is ensured by da.apply_gufunc + ) + new_data = da.apply_gufunc( + _transform_chunk_to_lst, + '(t,y)->(t,y)', + data, + axes=[(time_dim, lon_dim), (time_dim, lon_dim)], + output_dtypes=output_dtypes, + ) + return new_data + + +def _transform_arr_to_lst( + data: np.ndarray | da.core.Array, + *, + time_index: np.ndarray, + mask: np.ndarray, + time_dim: int, + lon_dim: int, + output_dtypes: DTypeLike, +) -> np.ndarray | da.core.Array: + """Transform array with UTC coord to local solar time (LST) coord. + + Note + ---- + This function either calls `_transform_to_lst_eager` or + `_transform_to_lst_lazy` depending on the type of input data. + + """ + if isinstance(data, np.ndarray): + func = _transform_to_lst_eager # type: ignore + else: + func = _transform_to_lst_lazy # type: ignore + new_data = func( + data, # type: ignore + time_index=time_index, + mask=mask, + time_dim=time_dim, + lon_dim=lon_dim, + output_dtypes=output_dtypes, + ) + return new_data + + +def _transform_cube_to_lst(cube: Cube) -> Cube: + """Transform cube to local solar time (LST) coordinate (lazy; in-place).""" + # Rechunk cube properly (it must not be chunked along time and longitude + # dimension); this also creates a new cube so the original input cube is + # not overwritten + complete_coords = [ + cube.coord('time', dim_coords=True), cube.coord('longitude'), + ] + cube = rechunk_cube(cube, complete_coords) + + time_coord = cube.coord('time', dim_coords=True) + lon_coord = cube.coord('longitude') + time_dim = cube.coord_dims(time_coord)[0] + lon_dim = cube.coord_dims(lon_coord)[0] + + # Transform cube data + (time_index, mask) = _get_time_index_and_mask(time_coord, lon_coord) + _transform_arr = partial( + _transform_arr_to_lst, + time_index=time_index, + mask=mask, + ) + cube.data = _transform_arr( + cube.core_data(), + time_dim=time_dim, + lon_dim=lon_dim, + output_dtypes=cube.dtype, + ) + + # Transform aux coords that span time and longitude dimensions + for coord in cube.coords(dim_coords=False): + dims = cube.coord_dims(coord) + if time_dim in dims and lon_dim in dims: + time_dim_ = dims.index(time_dim) + lon_dim_ = dims.index(lon_dim) + coord.points = _transform_arr( + coord.core_points(), + time_dim=time_dim_, + lon_dim=lon_dim_, + output_dtypes=coord.dtype, + ) + if coord.has_bounds(): + coord.bounds = _transform_arr( + coord.core_bounds(), + time_dim=time_dim_, + lon_dim=lon_dim_, + output_dtypes=coord.bounds_dtype, + ) + + # Transform cell measures that span time and longitude dimensions + for cell_measure in cube.cell_measures(): + dims = cube.cell_measure_dims(cell_measure) + if time_dim in dims and lon_dim in dims: + time_dim_ = dims.index(time_dim) + lon_dim_ = dims.index(lon_dim) + cell_measure.data = _transform_arr( + cell_measure.core_data(), + time_dim=time_dim_, + lon_dim=lon_dim_, + output_dtypes=cell_measure.dtype, + ) + + # Transform ancillary variables that span time and longitude dimensions + for anc_var in cube.ancillary_variables(): + dims = cube.ancillary_variable_dims(anc_var) + if time_dim in dims and lon_dim in dims: + time_dim_ = dims.index(time_dim) + lon_dim_ = dims.index(lon_dim) + anc_var.data = _transform_arr( + anc_var.core_data(), + time_dim=time_dim_, + lon_dim=lon_dim_, + output_dtypes=anc_var.dtype, + ) + + return cube + + +def _check_cube_coords(cube): + if not cube.coords('time', dim_coords=True): + raise CoordinateNotFoundError( + f"Input cube {cube.summary(shorten=True)} needs a dimensional " + f"coordinate `time`" + ) + time_coord = cube.coord('time', dim_coords=True) + # The following works since DimCoords are always 1D and monotonic + if time_coord.points[0] > time_coord.points[-1]: + raise ValueError("`time` coordinate must be monotonically increasing") + + if not cube.coords('longitude'): + raise CoordinateNotFoundError( + f"Input cube {cube.summary(shorten=True)} needs a coordinate " + f"`longitude`" + ) + lon_ndim = len(cube.coord_dims('longitude')) + if lon_ndim != 1: + raise CoordinateMultiDimError( + f"Input cube {cube.summary(shorten=True)} needs a 1D coordinate " + f"`longitude`, got {lon_ndim:d}D" + ) + + +def local_solar_time(cube: Cube) -> Cube: + """Convert UTC time coordinate to local solar time (LST). + + This preprocessor transforms input data with a UTC-based time coordinate to + a `local solar time (LST) `__ + coordinate. In LST, 12:00 noon is defined as the moment when the sun + reaches its highest point in the sky. Thus, LST is mainly determined by + longitude of a location. LST is particularly suited to analyze diurnal + cycles across larger regions of the globe, which would be phase-shifted + against each other when using UTC time. + + To transform data from UTC to LST, this function shifts data along the time + axis based on the longitude. In addition to the `cube`'s data, this + function also considers auxiliary coordinates, cell measures and ancillary + variables that span both the time and longitude dimension. + + Note + ---- + This preprocessor preserves the temporal frequency of the input data. For + example, hourly input data will be transformed into hourly output data. For + this, a location's exact LST will be put into corresponding bins defined + by the bounds of the input time coordinate (in this example, the bin size + is 1 hour). If time bounds are not given or cannot be approximated (only + one time step is given), a bin size of 1 hour is assumed. + + LST is approximated as `UTC_time + 12*longitude/180`, where `longitude` is + assumed to be in [-180, 180] (this function automatically calculates the + correct format for the longitude). This is only an approximation since the + exact LST also depends on the day of year due to the eccentricity of + Earth's orbit (see `equation of time + `__). However, since the + corresponding error is ~15 min at most, this is ignored here, as most + climate model data has a courser temporal resolution and the time scale for + diurnal evolution of meteorological phenomena is usually in the order of + hours, not minutes. + + Parameters + ---------- + cube: + Input cube. Needs a 1D monotonically increasing dimensional coordinate + `time` (assumed to refer to UTC time) and a 1D coordinate `longitude`. + + Returns + ------- + Cube + Transformed cube of same shape as input cube with an LST coordinate + instead of UTC time. + + Raises + ------ + iris.exceptions.CoordinateNotFoundError + Input cube does not contain valid `time` and/or `longitude` coordinate. + iris.exceptions.CoordinateMultiDimError + Input cube has multidimensional `longitude` coordinate. + ValueError + `time` coordinate of input cube is not monotonically increasing. + + """ + # Make sure that cube has valid time and longitude coordinates + _check_cube_coords(cube) + + # Transform cube data and all dimensional metadata that spans time AND + # longitude dimensions + cube = _transform_cube_to_lst(cube) + + # Adapt metadata of time coordinate + cube.coord('time', dim_coords=True).long_name = 'Local Solar Time' + + return cube diff --git a/tests/integration/preprocessor/_time/__init__.py b/tests/integration/preprocessor/_time/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/integration/preprocessor/_time/test_time.py b/tests/integration/preprocessor/_time/test_time.py new file mode 100644 index 0000000000..f1da8d45e7 --- /dev/null +++ b/tests/integration/preprocessor/_time/test_time.py @@ -0,0 +1,561 @@ +"""Unit tests for the :func:`esmvalcore.preprocessor._time` module.""" + +import dask.array as da +import numpy as np +import pytest +from cf_units import Unit +from iris.coords import ( + AncillaryVariable, + AuxCoord, + CellMeasure, + CellMethod, + DimCoord, +) +from iris.cube import Cube +from iris.exceptions import CoordinateMultiDimError, CoordinateNotFoundError + +from esmvalcore.preprocessor._time import climate_statistics, local_solar_time +from tests import assert_array_equal + + +@pytest.fixture +def easy_2d_cube(): + """Create easy 2D cube to test statistical operators.""" + time = DimCoord( + [2.0, 3.0], + bounds=[[-0.5, 2.5], [2.5, 3.5]], + standard_name='time', + units='days since 2000-01-01', + ) + lat = DimCoord( + [0.0, 1.0], standard_name='latitude', units='degrees' + ) + cube = Cube( + np.arange(4, dtype=np.float32).reshape(2, 2), + standard_name='air_temperature', + units='K', + dim_coords_and_dims=[(time, 0), (lat, 1)], + ) + return cube + + +@pytest.mark.parametrize( + 'operator,kwargs,expected_data,expected_units', + [ + ('gmean', {}, [0.0, 1.7320509], 'K'), + ('hmean', {}, [0.0, 1.5], 'K'), + ('max', {}, [2.0, 3.0], 'K'), + ('mean', {}, [0.5, 1.5], 'K'), + ('mean', {'weights': False}, [1.0, 2.0], 'K'), + ('median', {}, [1.0, 2.0], 'K'), + ('min', {}, [0.0, 1.0], 'K'), + ('peak', {}, [2.0, 3.0], 'K'), + ('percentile', {'percent': 0.0}, [0.0, 1.0], 'K'), + ('rms', {}, [1.0, 1.7320509], 'K'), + ('rms', {'weights': False}, [1.414214, 2.236068], 'K'), + ('std_dev', {}, [1.414214, 1.414214], 'K'), + ('std_dev', {'ddof': 0}, [1.0, 1.0], 'K'), + ('sum', {}, [2.0, 6.0], 'K day'), + ('sum', {'weights': False}, [2.0, 4.0], 'K'), + ('variance', {}, [2.0, 2.0], 'K2'), + ('variance', {'ddof': 0}, [1.0, 1.0], 'K2'), + ('wpercentile', {'percent': 50.0}, [0.5, 1.5], 'K'), + ] +) +def test_statistical_operators( + operator, kwargs, expected_data, expected_units, easy_2d_cube +): + """Test ``climate_statistics`` with different operators.""" + res = climate_statistics(easy_2d_cube, operator, **kwargs) + + assert res.var_name == easy_2d_cube.var_name + assert res.long_name == easy_2d_cube.long_name + assert res.standard_name == easy_2d_cube.standard_name + assert res.attributes == easy_2d_cube.attributes + assert res.units == expected_units + assert res.coord('latitude') == easy_2d_cube.coord('latitude') + assert res.coord('time').shape == (1, ) + np.testing.assert_allclose(res.data, expected_data, atol=1e-6, rtol=1e-6) + + +@pytest.fixture +def realistic_4d_cube(): + """Create realistic 4D cube.""" + time = DimCoord( + [11.0, 12.0], + standard_name='time', + units=Unit('hours since 1851-01-01', calendar='360_day'), + ) + plev = DimCoord([50000], standard_name='air_pressure', units='Pa') + lat = DimCoord([0.0, 1.0], standard_name='latitude', units='degrees') + lon = DimCoord( + [0.0, 20.0, 345.0], standard_name='longitude', units='degrees' + ) + + aux_2d_data = np.arange(2 * 3).reshape(2, 3) + aux_2d_bounds = np.stack( + (aux_2d_data - 1, aux_2d_data, aux_2d_data + 1), axis=-1 + ) + aux_2d = AuxCoord(aux_2d_data, var_name='aux_2d') + aux_2d_with_bnds = AuxCoord( + aux_2d_data, bounds=aux_2d_bounds, var_name='aux_2d_with_bnds' + ) + aux_time = AuxCoord(['Jan', 'Jan'], var_name='aux_time') + aux_lon = AuxCoord([0, 1, 2], var_name='aux_lon') + + cell_area = CellMeasure( + np.arange(2 * 2 * 3).reshape(2, 2, 3) + 10, + standard_name='cell_area', + units='m2', + measure='area', + ) + type_var = AncillaryVariable( + [['sea', 'land', 'lake'], ['lake', 'sea', 'land']], + var_name='type', + units='no_unit', + ) + + cube = Cube( + np.ma.masked_inside( + np.arange(2 * 1 * 2 * 3).reshape(2, 1, 2, 3), 1, 3 + ), + var_name='ta', + standard_name='air_temperature', + long_name='Air Temperature', + units='K', + cell_methods=[CellMethod('mean', 'time')], + dim_coords_and_dims=[(time, 0), (plev, 1), (lat, 2), (lon, 3)], + aux_coords_and_dims=[ + (aux_2d, (0, 3)), + (aux_2d_with_bnds, (0, 3)), + (aux_time, 0), + (aux_lon, 3), + ], + cell_measures_and_dims=[(cell_area, (0, 2, 3))], + ancillary_variables_and_dims=[(type_var, (0, 3))], + attributes={'test': 1}, + ) + return cube + + +def test_local_solar_time_regular(realistic_4d_cube): + """Test ``local_solar_time``.""" + input_cube = realistic_4d_cube.copy() + + result = local_solar_time(input_cube) + + assert input_cube == realistic_4d_cube + + assert result.metadata == input_cube.metadata + assert result.shape == input_cube.shape + assert result.coord('time') != input_cube.coord('time') + assert result.coord('air_pressure') == input_cube.coord('air_pressure') + assert result.coord('latitude') == input_cube.coord('latitude') + assert result.coord('longitude') == input_cube.coord('longitude') + + assert result.coord('time').standard_name == 'time' + assert result.coord('time').var_name is None + assert result.coord('time').long_name == 'Local Solar Time' + assert result.coord('time').units == Unit( + 'hours since 1850-01-01', calendar='360_day' + ) + assert result.coord('time').attributes == {} + np.testing.assert_allclose( + result.coord('time').points, [8651.0, 8652.0] + ) + np.testing.assert_allclose( + result.coord('time').bounds, [[8650.5, 8651.5], [8651.5, 8652.5]] + ) + + assert result.coord('aux_time') == input_cube.coord('aux_time') + assert result.coord('aux_lon') == input_cube.coord('aux_lon') + assert ( + result.coord('aux_2d').metadata == input_cube.coord('aux_2d').metadata + ) + assert not result.coord('aux_2d').has_lazy_points() + assert_array_equal( + result.coord('aux_2d').points, + np.ma.masked_equal([[0, 99, 5], [3, 1, 99]], 99), + ) + assert not result.coord('aux_2d').has_bounds() + assert ( + result.coord('aux_2d_with_bnds').metadata == + input_cube.coord('aux_2d_with_bnds').metadata + ) + assert not result.coord('aux_2d_with_bnds').has_lazy_points() + assert_array_equal( + result.coord('aux_2d_with_bnds').points, + np.ma.masked_equal([[0, 99, 5], [3, 1, 99]], 99), + ) + assert not result.coord('aux_2d_with_bnds').has_lazy_bounds() + assert_array_equal( + result.coord('aux_2d_with_bnds').bounds, + np.ma.masked_equal( + [ + [[-1, 0, 1], [99, 99, 99], [4, 5, 6]], + [[2, 3, 4], [0, 1, 2], [99, 99, 99]], + ], + 99, + ), + ) + + assert ( + result.cell_measure('cell_area').metadata == + input_cube.cell_measure('cell_area').metadata + ) + assert not result.cell_measure('cell_area').has_lazy_data() + assert_array_equal( + result.cell_measure('cell_area').data, + np.ma.masked_equal( + [ + [[10, 99, 18], [13, 99, 21]], + [[16, 11, 99], [19, 14, 99]], + ], + 99, + ), + ) + assert ( + result.ancillary_variable('type').metadata == + input_cube.ancillary_variable('type').metadata + ) + assert not result.ancillary_variable('type').has_lazy_data() + assert_array_equal( + result.ancillary_variable('type').data, + np.ma.masked_equal( + [['sea', 'miss', 'land'], ['lake', 'land', 'miss']], 'miss' + ), + ) + + assert not result.has_lazy_data() + assert_array_equal( + result.data, + np.ma.masked_equal( + [ + [[[0, 99, 8], [99, 99, 11]]], + [[[6, 99, 99], [9, 4, 99]]], + ], + 99, + ), + ) + + +def test_local_solar_time_1_time_step(realistic_4d_cube): + """Test ``local_solar_time``.""" + input_cube = realistic_4d_cube[[0]] + + result = local_solar_time(input_cube) + + assert input_cube == realistic_4d_cube[[0]] + + assert result.metadata == input_cube.metadata + assert result.shape == input_cube.shape + assert result.coord('time') != input_cube.coord('time') + assert result.coord('air_pressure') == input_cube.coord('air_pressure') + assert result.coord('latitude') == input_cube.coord('latitude') + assert result.coord('longitude') == input_cube.coord('longitude') + + assert result.coord('time').standard_name == 'time' + assert result.coord('time').var_name is None + assert result.coord('time').long_name == 'Local Solar Time' + assert result.coord('time').units == Unit( + 'hours since 1850-01-01', calendar='360_day' + ) + assert result.coord('time').attributes == {} + np.testing.assert_allclose(result.coord('time').points, [8651.0]) + np.testing.assert_allclose(result.coord('time').bounds, [[8650.5, 8651.5]]) + + assert result.coord('aux_time') == input_cube.coord('aux_time') + assert result.coord('aux_lon') == input_cube.coord('aux_lon') + assert ( + result.coord('aux_2d').metadata == input_cube.coord('aux_2d').metadata + ) + assert not result.coord('aux_2d').has_lazy_points() + assert_array_equal( + result.coord('aux_2d').points, np.ma.masked_equal([[0, 99, 99]], 99) + ) + assert not result.coord('aux_2d').has_bounds() + assert ( + result.coord('aux_2d_with_bnds').metadata == + input_cube.coord('aux_2d_with_bnds').metadata + ) + assert not result.coord('aux_2d_with_bnds').has_lazy_points() + assert_array_equal( + result.coord('aux_2d_with_bnds').points, + np.ma.masked_equal([[0, 99, 99]], 99), + ) + assert not result.coord('aux_2d_with_bnds').has_lazy_bounds() + assert_array_equal( + result.coord('aux_2d_with_bnds').bounds, + np.ma.masked_equal([[[-1, 0, 1], [99, 99, 99], [99, 99, 99]]], 99), + ) + + assert ( + result.cell_measure('cell_area').metadata == + input_cube.cell_measure('cell_area').metadata + ) + assert not result.cell_measure('cell_area').has_lazy_data() + assert_array_equal( + result.cell_measure('cell_area').data, + np.ma.masked_equal([[[10, 99, 99], [13, 99, 99]]], 99), + ) + assert ( + result.ancillary_variable('type').metadata == + input_cube.ancillary_variable('type').metadata + ) + assert not result.ancillary_variable('type').has_lazy_data() + assert_array_equal( + result.ancillary_variable('type').data, + np.ma.masked_equal([['sea', 'miss', 'miss']], 'miss'), + ) + + assert not result.has_lazy_data() + assert_array_equal( + result.data, + np.ma.masked_equal([[[[0, 99, 99], [99, 99, 99]]]], 99), + ) + + +@pytest.fixture +def realistic_unstructured_cube(): + """Create realistic unstructured cube.""" + time = DimCoord( + [0.0, 6.0, 12.0, 18.0, 24.0], + bounds=[ + [-3.0, 3.0], [3.0, 9.0], [9.0, 15.0], [15.0, 21.0], [21.0, 27.0] + ], + var_name='time', + standard_name='time', + long_name='time', + units=Unit('hours since 1851-01-01'), + ) + + lat = AuxCoord( + [0.0, 0.0, 0.0, 0.0], + var_name='lat', + standard_name='latitude', + long_name='latitude', + units='degrees_north', + ) + lon = AuxCoord( + [0.0, 80 * np.pi / 180.0, -120 * np.pi / 180.0, 160 * np.pi / 180.0], + var_name='lon', + standard_name='longitude', + long_name='longitude', + units='rad', + ) + aux_2d_data = da.ma.masked_inside(da.arange(4 * 5).reshape(4, 5), 3, 10) + aux_2d_bounds = da.stack((aux_2d_data - 1, aux_2d_data + 1), axis=-1) + aux_2d = AuxCoord(aux_2d_data, var_name='aux_2d') + aux_2d_with_bnds = AuxCoord( + aux_2d_data, bounds=aux_2d_bounds, var_name='aux_2d_with_bnds' + ) + aux_0d = AuxCoord([0], var_name='aux_0d') + + cell_measure_2d = CellMeasure( + da.ma.masked_inside(da.arange(4 * 5).reshape(4, 5), 3, 10), + var_name='cell_measure', + ) + anc_var_2d = AncillaryVariable( + da.ma.masked_inside(da.arange(4 * 5).reshape(4, 5), 3, 10), + var_name='anc_var', + ) + + cube = Cube( + da.arange(4 * 5).reshape(4, 5), + var_name='ta', + standard_name='air_temperature', + long_name='Air Temperature', + units='K', + dim_coords_and_dims=[(time, 1)], + aux_coords_and_dims=[ + (lat, 0), + (lon, 0), + (aux_2d, (0, 1)), + (aux_2d_with_bnds, (0, 1)), + (aux_0d, ()), + ], + cell_measures_and_dims=[(cell_measure_2d, (0, 1))], + ancillary_variables_and_dims=[(anc_var_2d, (0, 1))], + ) + return cube + + +def test_local_solar_time_unstructured(realistic_unstructured_cube): + """Test ``local_solar_time``.""" + input_cube = realistic_unstructured_cube.copy() + + result = local_solar_time(input_cube) + + assert input_cube == realistic_unstructured_cube + + assert result.metadata == input_cube.metadata + assert result.shape == input_cube.shape + assert result.coord('time') != input_cube.coord('time') + assert result.coord('latitude') == input_cube.coord('latitude') + assert result.coord('longitude') == input_cube.coord('longitude') + + assert result.coord('time').standard_name == 'time' + assert result.coord('time').var_name == 'time' + assert result.coord('time').long_name == 'Local Solar Time' + assert result.coord('time').units == 'hours since 1850-01-01' + assert result.coord('time').attributes == {} + np.testing.assert_allclose( + result.coord('time').points, [8760.0, 8766.0, 8772.0, 8778.0, 8784.0] + ) + np.testing.assert_allclose( + result.coord('time').bounds, + [ + [8757.0, 8763.0], + [8763.0, 8769.0], + [8769.0, 8775.0], + [8775.0, 8781.0], + [8781.0, 8787.0], + ], + ) + + assert result.coord('aux_0d') == input_cube.coord('aux_0d') + assert ( + result.coord('aux_2d').metadata == input_cube.coord('aux_2d').metadata + ) + assert result.coord('aux_2d').has_lazy_points() + assert_array_equal( + result.coord('aux_2d').points, + np.ma.masked_equal( + [ + [0, 1, 2, 99, 99], + [99, 99, 99, 99, 99], + [11, 12, 13, 14, 99], + [99, 99, 15, 16, 17], + ], + 99, + ), + ) + assert not result.coord('aux_2d').has_bounds() + assert ( + result.coord('aux_2d_with_bnds').metadata == + input_cube.coord('aux_2d_with_bnds').metadata + ) + assert result.coord('aux_2d_with_bnds').has_lazy_points() + assert_array_equal( + result.coord('aux_2d_with_bnds').points, + np.ma.masked_equal( + [ + [0, 1, 2, 99, 99], + [99, 99, 99, 99, 99], + [11, 12, 13, 14, 99], + [99, 99, 15, 16, 17], + ], + 99, + ), + ) + assert result.coord('aux_2d_with_bnds').has_lazy_bounds() + assert_array_equal( + result.coord('aux_2d_with_bnds').bounds, + np.ma.masked_equal( + [ + [[-1, 1], [0, 2], [1, 3], [99, 99], [99, 99]], + [[99, 99], [99, 99], [99, 99], [99, 99], [99, 99]], + [[10, 12], [11, 13], [12, 14], [13, 15], [99, 99]], + [[99, 99], [99, 99], [14, 16], [15, 17], [16, 18]], + ], + 99, + ), + ) + + assert ( + result.cell_measure('cell_measure').metadata == + input_cube.cell_measure('cell_measure').metadata + ) + assert result.cell_measure('cell_measure').has_lazy_data() + assert_array_equal( + result.cell_measure('cell_measure').data, + np.ma.masked_equal( + [ + [0, 1, 2, 99, 99], + [99, 99, 99, 99, 99], + [11, 12, 13, 14, 99], + [99, 99, 15, 16, 17], + ], + 99, + ), + ) + assert ( + result.ancillary_variable('anc_var').metadata == + input_cube.ancillary_variable('anc_var').metadata + ) + assert result.ancillary_variable('anc_var').has_lazy_data() + assert_array_equal( + result.ancillary_variable('anc_var').data, + np.ma.masked_equal( + [ + [0, 1, 2, 99, 99], + [99, 99, 99, 99, 99], + [11, 12, 13, 14, 99], + [99, 99, 15, 16, 17], + ], + 99, + ), + ) + + assert result.has_lazy_data() + assert_array_equal( + result.data, + np.ma.masked_equal( + [ + [0, 1, 2, 3, 4], + [99, 5, 6, 7, 8], + [11, 12, 13, 14, 99], + [99, 99, 15, 16, 17], + ], + 99, + ), + ) + + +def test_local_solar_time_no_time_fail(realistic_4d_cube): + """Test ``local_solar_time``.""" + realistic_4d_cube.remove_coord('time') + msg = 'needs a dimensional coordinate `time`' + with pytest.raises(CoordinateNotFoundError, match=msg): + local_solar_time(realistic_4d_cube) + + +def test_local_solar_time_scalar_time_fail(realistic_4d_cube): + """Test ``local_solar_time``.""" + input_cube = realistic_4d_cube[0] + msg = 'needs a dimensional coordinate `time`' + with pytest.raises(CoordinateNotFoundError, match=msg): + local_solar_time(input_cube) + + +def test_local_solar_time_time_decreasing_fail(realistic_4d_cube): + """Test ``local_solar_time``.""" + input_cube = realistic_4d_cube[::-1] + msg = '`time` coordinate must be monotonically increasing' + with pytest.raises(ValueError, match=msg): + local_solar_time(input_cube) + + +def test_local_solar_time_no_lon_fail(realistic_4d_cube): + """Test ``local_solar_time``.""" + realistic_4d_cube.remove_coord('longitude') + msg = 'needs a coordinate `longitude`' + with pytest.raises(CoordinateNotFoundError, match=msg): + local_solar_time(realistic_4d_cube) + + +def test_local_solar_time_scalar_lon_fail(realistic_4d_cube): + """Test ``local_solar_time``.""" + input_cube = realistic_4d_cube[..., 0] + msg = 'needs a 1D coordinate `longitude`, got 0D' + with pytest.raises(CoordinateMultiDimError, match=msg): + local_solar_time(input_cube) + + +def test_local_solar_time_2d_lon_fail(easy_2d_cube): + """Test ``local_solar_time``.""" + lon_coord = AuxCoord(easy_2d_cube.data, standard_name='longitude') + easy_2d_cube.add_aux_coord(lon_coord, (0, 1)) + msg = 'needs a 1D coordinate `longitude`, got 2D' + with pytest.raises(CoordinateMultiDimError, match=msg): + local_solar_time(easy_2d_cube) diff --git a/tests/unit/preprocessor/_time/test_time.py b/tests/unit/preprocessor/_time/test_time.py index f9d207ed49..a89c0ba760 100644 --- a/tests/unit/preprocessor/_time/test_time.py +++ b/tests/unit/preprocessor/_time/test_time.py @@ -1518,7 +1518,7 @@ def test_timeseries_filter_timecoord(self): filter_stats='sum') def test_timeseries_filter_implemented(self): - """Test a not implemnted filter.""" + """Test a not implemented filter.""" with self.assertRaises(NotImplementedError): timeseries_filter(self.cube, 7, @@ -2160,65 +2160,5 @@ def test_resample_fails_scalar(self): resample_time(cube, day=16) -@pytest.fixture -def easy_2d_cube(): - """Create easy 2D cube to test statistical operators.""" - time = iris.coords.DimCoord( - [2.0, 3.0], - bounds=[[-0.5, 2.5], [2.5, 3.5]], - standard_name='time', - units='days since 2000-01-01', - ) - lat = iris.coords.DimCoord( - [0.0, 1.0], standard_name='latitude', units='degrees' - ) - cube = Cube( - np.arange(4, dtype=np.float32).reshape(2, 2), - standard_name='air_temperature', - units='K', - dim_coords_and_dims=[(time, 0), (lat, 1)], - ) - return cube - - -@pytest.mark.parametrize( - 'operator,kwargs,expected_data,expected_units', - [ - ('gmean', {}, [0.0, 1.7320509], 'K'), - ('hmean', {}, [0.0, 1.5], 'K'), - ('max', {}, [2.0, 3.0], 'K'), - ('mean', {}, [0.5, 1.5], 'K'), - ('mean', {'weights': False}, [1.0, 2.0], 'K'), - ('median', {}, [1.0, 2.0], 'K'), - ('min', {}, [0.0, 1.0], 'K'), - ('peak', {}, [2.0, 3.0], 'K'), - ('percentile', {'percent': 0.0}, [0.0, 1.0], 'K'), - ('rms', {}, [1.0, 1.7320509], 'K'), - ('rms', {'weights': False}, [1.414214, 2.236068], 'K'), - ('std_dev', {}, [1.414214, 1.414214], 'K'), - ('std_dev', {'ddof': 0}, [1.0, 1.0], 'K'), - ('sum', {}, [2.0, 6.0], 'K day'), - ('sum', {'weights': False}, [2.0, 4.0], 'K'), - ('variance', {}, [2.0, 2.0], 'K2'), - ('variance', {'ddof': 0}, [1.0, 1.0], 'K2'), - ('wpercentile', {'percent': 50.0}, [0.5, 1.5], 'K'), - ] -) -def test_statistical_operators( - operator, kwargs, expected_data, expected_units, easy_2d_cube -): - """Test ``climate_statistics`` with different operators.""" - res = climate_statistics(easy_2d_cube, operator, **kwargs) - - assert res.var_name == easy_2d_cube.var_name - assert res.long_name == easy_2d_cube.long_name - assert res.standard_name == easy_2d_cube.standard_name - assert res.attributes == easy_2d_cube.attributes - assert res.units == expected_units - assert res.coord('latitude') == easy_2d_cube.coord('latitude') - assert res.coord('time').shape == (1, ) - np.testing.assert_allclose(res.data, expected_data, atol=1e-6, rtol=1e-6) - - if __name__ == '__main__': unittest.main() diff --git a/tests/unit/test_iris_helpers.py b/tests/unit/test_iris_helpers.py index 07e81608e8..e91f2f70a1 100644 --- a/tests/unit/test_iris_helpers.py +++ b/tests/unit/test_iris_helpers.py @@ -4,6 +4,7 @@ from itertools import permutations from unittest import mock +import dask.array as da import numpy as np import pytest from cf_units import Unit @@ -21,6 +22,7 @@ add_leading_dim_to_cube, date2num, merge_cube_attributes, + rechunk_cube, ) @@ -220,3 +222,119 @@ def test_merge_cube_attributes_1_cube(): merge_cube_attributes(cubes) assert len(cubes) == 1 assert_attribues_equal(cubes[0].attributes, expected_attributes) + + +@pytest.fixture +def cube_3d(): + """3D sample cube.""" + # DimCoords + x = DimCoord([0, 1, 2], var_name='x') + y = DimCoord([0, 1, 2], var_name='y') + z = DimCoord([0, 1, 2, 3], var_name='z') + + # AuxCoords + aux_x = AuxCoord( + da.ones(3, chunks=1), + bounds=da.ones((3, 3), chunks=(1, 1)), + var_name='aux_x', + ) + aux_z = AuxCoord(da.ones(4, chunks=1), var_name='aux_z') + aux_xy = AuxCoord(da.ones((3, 3), chunks=(1, 1)), var_name='xy') + aux_xz = AuxCoord(da.ones((3, 4), chunks=(1, 1)), var_name='xz') + aux_yz = AuxCoord(da.ones((3, 4), chunks=(1, 1)), var_name='yz') + aux_xyz = AuxCoord( + da.ones((3, 3, 4), chunks=(1, 1, 1)), + bounds=da.ones((3, 3, 4, 3), chunks=(1, 1, 1, 1)), + var_name='xyz', + ) + aux_coords_and_dims = [ + (aux_x, 0), + (aux_z, 2), + (aux_xy, (0, 1)), + (aux_xz, (0, 2)), + (aux_yz, (1, 2)), + (aux_xyz, (0, 1, 2)), + ] + + # CellMeasures and AncillaryVariables + cell_measure = CellMeasure( + da.ones((3, 4), chunks=(1, 1)), var_name='cell_measure' + ) + anc_var = AncillaryVariable( + da.ones((3, 4), chunks=(1, 1)), var_name='anc_var' + ) + + return Cube( + da.ones((3, 3, 4), chunks=(1, 1, 1)), + var_name='cube', + dim_coords_and_dims=[(x, 0), (y, 1), (z, 2)], + aux_coords_and_dims=aux_coords_and_dims, + cell_measures_and_dims=[(cell_measure, (1, 2))], + ancillary_variables_and_dims=[(anc_var, (0, 2))], + ) + + +def test_rechunk_cube_fully_lazy(cube_3d): + """Test ``rechunk_cube``.""" + input_cube = cube_3d.copy() + + x_coord = input_cube.coord('x') + result = rechunk_cube(input_cube, [x_coord, 'y'], remaining_dims=2) + + assert input_cube == cube_3d + assert result == cube_3d + assert result.core_data().chunksize == (3, 3, 2) + assert result.coord('aux_x').core_points().chunksize == (3,) + assert result.coord('aux_z').core_points().chunksize == (1,) + assert result.coord('xy').core_points().chunksize == (3, 3) + assert result.coord('xz').core_points().chunksize == (3, 2) + assert result.coord('yz').core_points().chunksize == (3, 2) + assert result.coord('xyz').core_points().chunksize == (3, 3, 2) + assert result.coord('aux_x').core_bounds().chunksize == (3, 2) + assert result.coord('aux_z').core_bounds() is None + assert result.coord('xy').core_bounds() is None + assert result.coord('xz').core_bounds() is None + assert result.coord('yz').core_bounds() is None + assert result.coord('xyz').core_bounds().chunksize == (3, 3, 2, 2) + assert result.cell_measure('cell_measure').core_data().chunksize == (3, 2) + assert result.ancillary_variable('anc_var').core_data().chunksize == (3, 2) + + +def test_rechunk_cube_partly_lazy(cube_3d): + """Test ``rechunk_cube``.""" + input_cube = cube_3d.copy() + + # Realize some arrays + input_cube.data + input_cube.coord('xyz').points + input_cube.coord('xyz').bounds + input_cube.cell_measure('cell_measure').data + + result = rechunk_cube(input_cube, ['x', 'y'], remaining_dims=2) + + assert input_cube == cube_3d + assert result == cube_3d + assert not result.has_lazy_data() + assert result.coord('aux_x').core_points().chunksize == (3,) + assert result.coord('aux_z').core_points().chunksize == (1,) + assert result.coord('xy').core_points().chunksize == (3, 3) + assert result.coord('xz').core_points().chunksize == (3, 2) + assert result.coord('yz').core_points().chunksize == (3, 2) + assert not result.coord('xyz').has_lazy_points() + assert result.coord('aux_x').core_bounds().chunksize == (3, 2) + assert result.coord('aux_z').core_bounds() is None + assert result.coord('xy').core_bounds() is None + assert result.coord('xz').core_bounds() is None + assert result.coord('yz').core_bounds() is None + assert not result.coord('xyz').has_lazy_bounds() + assert not result.cell_measure('cell_measure').has_lazy_data() + assert result.ancillary_variable('anc_var').core_data().chunksize == (3, 2) + + +def test_rechunk_cube_invalid_coord_fail(cube_3d): + """Test ``rechunk_cube``.""" + msg = ( + "Complete coordinates must be 1D coordinates, got 2D coordinate 'xy'" + ) + with pytest.raises(CoordinateMultiDimError, match=msg): + rechunk_cube(cube_3d, ['xy'])