Skip to content
3 changes: 3 additions & 0 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ This document explains the changes made to Iris for this release
#. `@wjbenfold`_ added caching to the calculation of the points array in a
:class:`~iris.coords.DimCoord` created using
:meth:`~iris.coords.DimCoord.from_regular`. (:pull:`4698`)
#. `@wjbenfold`_ introduced caching in :func:`_lazy_data._optimum_chunksize` and
:func:`iris.fileformats.pp_load_rules._epoch_date_hours` to reduce time spent
repeating calculations. (:pull:`4716`)


🔥 Deprecations
Expand Down
30 changes: 27 additions & 3 deletions lib/iris/_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

"""

from functools import wraps
from functools import lru_cache, wraps

import dask
import dask.array as da
Expand Down Expand Up @@ -47,7 +47,14 @@ def is_lazy_data(data):
return result


def _optimum_chunksize(chunks, shape, limit=None, dtype=np.dtype("f4")):
@lru_cache
def _optimum_chunksize_internals(
chunks,
shape,
limit=None,
dtype=np.dtype("f4"),
dask_array_chunksize=dask.config.get("array.chunk-size"),
):
"""
Reduce or increase an initial chunk shape to get close to a chosen ideal
size, while prioritising the splitting of the earlier (outer) dimensions
Expand Down Expand Up @@ -86,7 +93,7 @@ def _optimum_chunksize(chunks, shape, limit=None, dtype=np.dtype("f4")):
# Set the chunksize limit.
if limit is None:
# Fetch the default 'optimal' chunksize from the dask config.
limit = dask.config.get("array.chunk-size")
limit = dask_array_chunksize
# Convert to bytes
limit = dask.utils.parse_bytes(limit)

Expand Down Expand Up @@ -146,6 +153,23 @@ def _optimum_chunksize(chunks, shape, limit=None, dtype=np.dtype("f4")):
return tuple(result)


@wraps(_optimum_chunksize_internals)
def _optimum_chunksize(
chunks,
shape,
limit=None,
dtype=np.dtype("f4"),
):

return _optimum_chunksize_internals(
tuple(chunks),
tuple(shape),
limit=limit,
dtype=dtype,
dask_array_chunksize=dask.config.get("array.chunk-size"),
)


def as_lazy_data(data, chunks=None, asarray=False):
"""
Convert the input array `data` to a dask array.
Expand Down
21 changes: 20 additions & 1 deletion lib/iris/fileformats/pp_load_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# SciTools/iris-code-generators:tools/gen_rules.py

import calendar
from functools import wraps

import cf_units
import numpy as np
Expand Down Expand Up @@ -514,7 +515,7 @@ def _new_coord_and_dims(
_HOURS_UNIT = cf_units.Unit("hours")


def _epoch_date_hours(epoch_hours_unit, datetime):
def _epoch_date_hours_internals(epoch_hours_unit, datetime):
"""
Return an 'hours since epoch' number for a date.

Expand Down Expand Up @@ -589,6 +590,24 @@ def _epoch_date_hours(epoch_hours_unit, datetime):
return epoch_hours


_epoch_date_hours_cache = {}


@wraps(_epoch_date_hours_internals)
def _epoch_date_hours(epoch_hours_unit, datetime):
# Not using functools.lru_cache because it does an equality check that fails
# on datetime objects from different calendars.

key = (epoch_hours_unit, datetime)

if key not in _epoch_date_hours_cache:
_epoch_date_hours_cache[key] = _epoch_date_hours_internals(
epoch_hours_unit, datetime
)

return _epoch_date_hours_cache[key]


def _convert_time_coords(
lbcode,
lbtim,
Expand Down