Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions benchmarks/benchmarks/load/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,58 @@ def time_many_var_load(self) -> None:
_ = load(str(self.FILE_PATH))


class ManyCubes:
FILE_PATH = BENCHMARK_DATA / "many_cube_file.nc"

@staticmethod
def _create_file(save_path: str) -> None:
"""Run externally - everything must be self-contained."""
import numpy as np

from iris import save
from iris.coords import AuxCoord, DimCoord
from iris.cube import Cube, CubeList

data_len = 81920
bnds_len = 3
data = np.arange(data_len).astype(np.float32)
bnds_data = (
np.arange(data_len * bnds_len)
.astype(np.float32)
.reshape(data_len, bnds_len)
)
time = DimCoord(np.array([0]), standard_name="time")
lat = AuxCoord(
data, bounds=bnds_data, standard_name="latitude", units="degrees"
)
lon = AuxCoord(
data, bounds=bnds_data, standard_name="longitude", units="degrees"
)
cube = Cube(data.reshape(1, -1), units="unknown")
cube.add_dim_coord(time, 0)
cube.add_aux_coord(lat, 1)
cube.add_aux_coord(lon, 1)

n_cubes = 100
cubes = CubeList()
for i in range(n_cubes):
cube = cube.copy()
cube.long_name = f"var_{i}"
cubes.append(cube)
save(cubes, save_path)

def setup_cache(self) -> None:
if not REUSE_DATA or not self.FILE_PATH.is_file():
# See :mod:`benchmarks.generate_data` docstring for full explanation.
_ = run_function_elsewhere(
self._create_file,
str(self.FILE_PATH),
)

def time_many_cube_load(self) -> None:
_ = load(str(self.FILE_PATH))


class StructuredFF:
"""Test structured loading of a large-ish fieldsfile.

Expand Down
3 changes: 2 additions & 1 deletion docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ This document explains the changes made to Iris for this release
🚀 Performance Enhancements
===========================

#. N/A
#. `@bouweandela`_ made loading :class:`~iris.cube.Cube`s from small NetCDF
files faster. (:pull:`6229`)


🔥 Deprecations
Expand Down
3 changes: 2 additions & 1 deletion lib/iris/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -924,7 +924,8 @@ def __eq__(self, other):
# For equality, require both globals + locals to match exactly.
# NOTE: array content works correctly, since 'locals' and 'globals' are always
# iris.common.mixin.LimitedAttributeDict, which gets this right.
other = CubeAttrsDict(other)
if not isinstance(other, CubeAttrsDict):
other = CubeAttrsDict(other)
result = self.locals == other.locals and self.globals == other.globals
return result

Expand Down
30 changes: 15 additions & 15 deletions lib/iris/fileformats/cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1336,9 +1336,11 @@ def __init__(self, file_source, warn=False, monotonic=False):
self._trim_ugrid_variable_types()
self._with_ugrid = False

self._translate()
self._build_cf_groups()
self._reset()
# Read the variables in the dataset only once to reduce runtime.
variables = self._dataset.variables
self._translate(variables)
self._build_cf_groups(variables)
self._reset(variables)

def __enter__(self):
# Enable use as a context manager
Expand Down Expand Up @@ -1380,16 +1382,16 @@ def filename(self):
def __repr__(self):
return "%s(%r)" % (self.__class__.__name__, self._filename)

def _translate(self):
def _translate(self, variables):
"""Classify the netCDF variables into CF-netCDF variables."""
netcdf_variable_names = list(self._dataset.variables.keys())
netcdf_variable_names = list(variables.keys())

# Identify all CF coordinate variables first. This must be done
# first as, by CF convention, the definition of a CF auxiliary
# coordinate variable may include a scalar CF coordinate variable,
# whereas we want these two types of variables to be mutually exclusive.
coords = CFCoordinateVariable.identify(
self._dataset.variables, monotonic=self._check_monotonic
variables, monotonic=self._check_monotonic
)
self.cf_group.update(coords)
coordinate_names = list(self.cf_group.coordinates.keys())
Expand All @@ -1402,9 +1404,7 @@ def _translate(self):
if issubclass(variable_type, CFGridMappingVariable)
else coordinate_names
)
self.cf_group.update(
variable_type.identify(self._dataset.variables, ignore=ignore)
)
self.cf_group.update(variable_type.identify(variables, ignore=ignore))

# Identify global netCDF attributes.
attr_dict = {
Expand All @@ -1414,7 +1414,7 @@ def _translate(self):
self.cf_group.global_attributes.update(attr_dict)

# Identify and register all CF formula terms.
formula_terms = _CFFormulaTermsVariable.identify(self._dataset.variables)
formula_terms = _CFFormulaTermsVariable.identify(variables)

for cf_var in formula_terms.values():
for cf_root, cf_term in cf_var.cf_terms_by_root.items():
Expand All @@ -1433,9 +1433,9 @@ def _translate(self):
)

for name in data_variable_names:
self.cf_group[name] = CFDataVariable(name, self._dataset.variables[name])
self.cf_group[name] = CFDataVariable(name, variables[name])

def _build_cf_groups(self):
def _build_cf_groups(self, variables):
"""Build the first order relationships between CF-netCDF variables."""

def _build(cf_variable):
Expand Down Expand Up @@ -1489,7 +1489,7 @@ def _span_check(
ignore += coordinate_names

match = variable_type.identify(
self._dataset.variables,
variables,
ignore=ignore,
target=cf_variable.cf_name,
warn=False,
Expand Down Expand Up @@ -1569,9 +1569,9 @@ def _span_check(
promoted.add(cf_name)
not_promoted = ignored.difference(promoted)

def _reset(self):
def _reset(self, variables):
"""Reset the attribute touch history of each variable."""
for nc_var_name in self._dataset.variables.keys():
for nc_var_name in variables.keys():
self.cf_group[nc_var_name].cf_attrs_reset()

def _close(self):
Expand Down
Loading