diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 4daff5a2d1..58b66690e8 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -62,6 +62,12 @@ This document explains the changes made to Iris for this release :attr:`~iris.cube.Cube.attributes`, and improving spacing throughout. (:pull:`4206`) +#. `@pp-mo`_ and `@lbdreyer`_ optimised loading netcdf files, resulting in a + speed up when loading with a single :func:`~iris.NameConstraint`. Note, this + optimisation only applies when matching on standard name, long name or + NetCDF variable name, not when matching on STASH. + (:pull:`4176`) + 🐛 Bugs Fixed ============= diff --git a/lib/iris/fileformats/__init__.py b/lib/iris/fileformats/__init__.py index 5e03f1e4fd..f2b0cfc095 100644 --- a/lib/iris/fileformats/__init__.py +++ b/lib/iris/fileformats/__init__.py @@ -90,7 +90,12 @@ def _load_grib(*args, **kwargs): # FORMAT_AGENT.add_spec( FormatSpecification( - "NetCDF", MagicNumber(4), 0x43444601, netcdf.load_cubes, priority=5 + "NetCDF", + MagicNumber(4), + 0x43444601, + netcdf.load_cubes, + priority=5, + constraint_aware_handler=True, ) ) @@ -102,6 +107,7 @@ def _load_grib(*args, **kwargs): 0x43444602, netcdf.load_cubes, priority=5, + constraint_aware_handler=True, ) ) @@ -114,6 +120,7 @@ def _load_grib(*args, **kwargs): 0x894844460D0A1A0A, netcdf.load_cubes, priority=5, + constraint_aware_handler=True, ) ) @@ -124,6 +131,7 @@ def _load_grib(*args, **kwargs): lambda protocol: protocol in ["http", "https"], netcdf.load_cubes, priority=6, + constraint_aware_handler=True, ) FORMAT_AGENT.add_spec(_nc_dap) del _nc_dap diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index 158cb6723d..0773d40c52 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -759,7 +759,51 @@ def coord_from_term(term): cube.add_aux_factory(factory) -def load_cubes(filenames, callback=None): +def _translate_constraints_to_var_callback(constraints): + """ + Translate load constraints into a simple data-var filter function, if possible. + + Returns: + * function(cf_var:CFDataVariable): --> bool, + or None. + + For now, ONLY handles a single NameConstraint with no 'STASH' component. + + """ + import iris._constraints + + constraints = iris._constraints.list_of_constraints(constraints) + result = None + if len(constraints) == 1: + (constraint,) = constraints + if ( + isinstance(constraint, iris._constraints.NameConstraint) + and constraint.STASH == "none" + ): + # As long as it doesn't use a STASH match, then we can treat it as + # a testing against name properties of cf_var. + # That's just like testing against name properties of a cube, except that they may not all exist. + def inner(cf_datavar): + match = True + for name in constraint._names: + expected = getattr(constraint, name) + if name != "STASH" and expected != "none": + attr_name = "cf_name" if name == "var_name" else name + # Fetch property : N.B. CFVariable caches the property values + # The use of a default here is the only difference from the code in NameConstraint. + if not hasattr(cf_datavar, attr_name): + continue + actual = getattr(cf_datavar, attr_name, "") + if actual != expected: + match = False + break + return match + + result = inner + return result + + +def load_cubes(filenames, callback=None, constraints=None): """ Loads cubes from a list of NetCDF filenames/URLs. @@ -779,6 +823,9 @@ def load_cubes(filenames, callback=None): """ from iris.io import run_callback + # Create a low-level data-var filter from the original load constraints, if they are suitable. + var_callback = _translate_constraints_to_var_callback(constraints) + # Create an actions engine. engine = _actions_engine() @@ -794,6 +841,10 @@ def load_cubes(filenames, callback=None): cf.cf_group.promoted.values() ) for cf_var in data_variables: + if var_callback and not var_callback(cf_var): + # Deliver only selected results. + continue + cube = _load_cube(engine, cf, cf_var, filename) # Process any associated formula terms and attach diff --git a/lib/iris/tests/integration/test_netcdf.py b/lib/iris/tests/integration/test_netcdf.py index 3d4e972c57..29b41bd728 100644 --- a/lib/iris/tests/integration/test_netcdf.py +++ b/lib/iris/tests/integration/test_netcdf.py @@ -672,5 +672,21 @@ def test_geostationary_no_false_offsets(self): self.assertEqual(cs.false_northing, 0.0) +class TestConstrainedLoad(tests.IrisTest): + filename = tests.get_data_path( + ("NetCDF", "label_and_climate", "A1B-99999a-river-sep-2070-2099.nc") + ) + + def test_netcdf_with_NameConstraint(self): + constr = iris.NameConstraint(var_name="cdf_temp_dmax_tmean_abs") + cubes = iris.load(self.filename, constr) + self.assertEqual(len(cubes), 1) + self.assertEqual(cubes[0].var_name, "cdf_temp_dmax_tmean_abs") + + def test_netcdf_with_no_constraint(self): + cubes = iris.load(self.filename) + self.assertEqual(len(cubes), 3) + + if __name__ == "__main__": tests.main() diff --git a/lib/iris/tests/unit/fileformats/netcdf/test__translate_constraints_to_var_callback.py b/lib/iris/tests/unit/fileformats/netcdf/test__translate_constraints_to_var_callback.py new file mode 100644 index 0000000000..fb08ffda2b --- /dev/null +++ b/lib/iris/tests/unit/fileformats/netcdf/test__translate_constraints_to_var_callback.py @@ -0,0 +1,100 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +""" +Unit tests for +:func:`iris.fileformats.netcdf._translate_constraints_to_var_callback`. + +""" + +from unittest.mock import MagicMock + +import iris +from iris.fileformats.cf import CFDataVariable +from iris.fileformats.netcdf import _translate_constraints_to_var_callback + +# import iris tests first so that some things can be initialised before +# importing anything else +import iris.tests as tests + + +class Test(tests.IrisTest): + data_variables = [ + CFDataVariable("var1", MagicMock(standard_name="x_wind")), + CFDataVariable("var2", MagicMock(standard_name="y_wind")), + CFDataVariable("var1", MagicMock(long_name="x component of wind")), + CFDataVariable( + "var1", + MagicMock(standard_name="x_wind", long_name="x component of wind"), + ), + CFDataVariable("var1", MagicMock()), + ] + + def test_multiple_constraints(self): + constrs = [ + iris.NameConstraint(standard_name="x_wind"), + iris.NameConstraint(var_name="var1"), + ] + result = _translate_constraints_to_var_callback(constrs) + self.assertIsNone(result) + + def test_non_NameConstraint(self): + constr = iris.AttributeConstraint(STASH="m01s00i002") + result = _translate_constraints_to_var_callback(constr) + self.assertIsNone(result) + + def test_str_constraint(self): + result = _translate_constraints_to_var_callback("x_wind") + self.assertIsNone(result) + + def test_Constaint_with_name(self): + constr = iris.Constraint(name="x_wind") + result = _translate_constraints_to_var_callback(constr) + self.assertIsNone(result) + + def test_NameConstraint_standard_name(self): + constr = iris.NameConstraint(standard_name="x_wind") + callback = _translate_constraints_to_var_callback(constr) + result = [callback(var) for var in self.data_variables] + self.assertArrayEqual(result, [True, False, False, True, False]) + + def test_NameConstraint_long_name(self): + constr = iris.NameConstraint(long_name="x component of wind") + callback = _translate_constraints_to_var_callback(constr) + result = [callback(var) for var in self.data_variables] + self.assertArrayEqual(result, [False, False, True, True, False]) + + def test_NameConstraint_var_name(self): + constr = iris.NameConstraint(var_name="var1") + callback = _translate_constraints_to_var_callback(constr) + result = [callback(var) for var in self.data_variables] + self.assertArrayEqual(result, [True, False, True, True, True]) + + def test_NameConstraint_standard_name_var_name(self): + constr = iris.NameConstraint(standard_name="x_wind", var_name="var1") + callback = _translate_constraints_to_var_callback(constr) + result = [callback(var) for var in self.data_variables] + self.assertArrayEqual(result, [True, False, False, True, False]) + + def test_NameConstraint_standard_name_long_name_var_name(self): + constr = iris.NameConstraint( + standard_name="x_wind", + long_name="x component of wind", + var_name="var1", + ) + callback = _translate_constraints_to_var_callback(constr) + result = [callback(var) for var in self.data_variables] + self.assertArrayEqual(result, [False, False, False, True, False]) + + def test_NameConstraint_with_STASH(self): + constr = iris.NameConstraint( + standard_name="x_wind", STASH="m01s00i024" + ) + result = _translate_constraints_to_var_callback(constr) + self.assertIsNone(result) + + +if __name__ == "__main__": + tests.main()