diff --git a/lib/iris/fileformats/netcdf/loader.py b/lib/iris/fileformats/netcdf/loader.py index 107415c79d..934d5d11ea 100644 --- a/lib/iris/fileformats/netcdf/loader.py +++ b/lib/iris/fileformats/netcdf/loader.py @@ -241,17 +241,19 @@ def _get_cf_var_data(cf_var, filename): result = as_lazy_data(proxy, chunks=None, dask_chunking=True) else: chunks = cf_var.cf_data.chunking() - if ( - chunks is None - and CHUNK_CONTROL.mode is ChunkControl.Modes.FROM_FILE - ): - raise KeyError( - f"{cf_var.cf_name} does not contain pre-existing chunk specifications." - f"Instead, you might wish to use CHUNK_CONTROL.set(), or just use default" - f" behaviour outside of a context manager. " - ) # In the "contiguous" case, pass chunks=None to 'as_lazy_data'. if chunks == "contiguous": + if ( + CHUNK_CONTROL.mode is ChunkControl.Modes.FROM_FILE + and isinstance( + cf_var, iris.fileformats.cf.CFDataVariable + ) + ): + raise KeyError( + f"{cf_var.cf_name} does not contain pre-existing chunk specifications." + f" Instead, you might wish to use CHUNK_CONTROL.set(), or just use default" + f" behaviour outside of a context manager. " + ) # Equivalent to chunks=None, but value required by chunking control chunks = list(cf_var.shape) diff --git a/lib/iris/tests/integration/test_netcdf__chunk_control.py b/lib/iris/tests/integration/test_netcdf__chunk_control.py deleted file mode 100644 index 03c952d63a..0000000000 --- a/lib/iris/tests/integration/test_netcdf__chunk_control.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the LGPL license. -# See COPYING and COPYING.LESSER in the root of the repository for full -# licensing details. -"""Integration tests for loading and saving netcdf files.""" - -# Import iris.tests first so that some things can be initialised before -# importing anything else. -import iris.tests as tests # isort:skip - -from pathlib import Path -import shutil -import tempfile - -import iris -from iris.fileformats.netcdf import loader -from iris.fileformats.netcdf.loader import CHUNK_CONTROL -import iris.tests.stock as istk - - -class TestChunking(tests.IrisTest): - @classmethod - def setUpClass(cls): - cls.old_min_bytes = loader._LAZYVAR_MIN_BYTES - loader._LAZYVAR_MIN_BYTES = 0 - cls.temp_dir = tempfile.mkdtemp() - cube = istk.simple_4d_with_hybrid_height() - cls.cube = cube - cls.cube_varname = "my_var" - cls.sigma_varname = "my_sigma" - cube.var_name = cls.cube_varname - cube.coord("sigma").var_name = cls.sigma_varname - cube.coord("sigma").guess_bounds() - cls.tempfile_path = Path(cls.temp_dir) / "tmp.nc" - iris.save(cls.cube, cls.tempfile_path) - - @classmethod - def tearDownClass(cls): - shutil.rmtree(cls.temp_dir) - loader._LAZYVAR_MIN_BYTES = cls.old_min_bytes - - def test_default(self): - cube = iris.load_cube(self.tempfile_path, self.cube_varname) - self.assertEqual((3, 4, 5, 6), cube.shape) - self.assertEqual((3, 4, 5, 6), cube.lazy_data().chunksize) - sigma = cube.coord("sigma") - self.assertEqual((4,), sigma.shape) - self.assertEqual((4,), sigma.lazy_points().chunksize) - self.assertEqual((4, 2), sigma.lazy_bounds().chunksize) - - def test_control_global(self): - with CHUNK_CONTROL.set(model_level_number=2): - cube = iris.load_cube(self.tempfile_path, self.cube_varname) - self.assertEqual((3, 4, 5, 6), cube.shape) - self.assertEqual((3, 2, 5, 6), cube.lazy_data().chunksize) - sigma = cube.coord("sigma") - self.assertEqual((4,), sigma.shape) - self.assertEqual((2,), sigma.lazy_points().chunksize) - self.assertEqual((2, 2), sigma.lazy_bounds().chunksize) - - def test_control_sigma_only(self): - with CHUNK_CONTROL.set(self.sigma_varname, model_level_number=2): - cube = iris.load_cube(self.tempfile_path, self.cube_varname) - self.assertEqual((3, 4, 5, 6), cube.shape) - self.assertEqual((3, 4, 5, 6), cube.lazy_data().chunksize) - sigma = cube.coord("sigma") - self.assertEqual((4,), sigma.shape) - self.assertEqual((2,), sigma.lazy_points().chunksize) - # N.B. this does not apply to bounds array - self.assertEqual((4, 2), sigma.lazy_bounds().chunksize) - - def test_control_cube_var(self): - with CHUNK_CONTROL.set(self.cube_varname, model_level_number=2): - cube = iris.load_cube(self.tempfile_path, self.cube_varname) - self.assertEqual((3, 4, 5, 6), cube.shape) - self.assertEqual((3, 2, 5, 6), cube.lazy_data().chunksize) - sigma = cube.coord("sigma") - self.assertEqual((4,), sigma.shape) - self.assertEqual((2,), sigma.lazy_points().chunksize) - self.assertEqual((2, 2), sigma.lazy_bounds().chunksize) - - def test_control_multiple(self): - with CHUNK_CONTROL.set( - self.cube_varname, model_level_number=2 - ), CHUNK_CONTROL.set(self.sigma_varname, model_level_number=3): - cube = iris.load_cube(self.tempfile_path, self.cube_varname) - self.assertEqual((3, 4, 5, 6), cube.shape) - self.assertEqual((3, 2, 5, 6), cube.lazy_data().chunksize) - sigma = cube.coord("sigma") - self.assertEqual((4,), sigma.shape) - self.assertEqual((3,), sigma.lazy_points().chunksize) - self.assertEqual((2, 2), sigma.lazy_bounds().chunksize) - - -if __name__ == "__main__": - tests.main() diff --git a/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py new file mode 100644 index 0000000000..7249c39829 --- /dev/null +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py @@ -0,0 +1,216 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Unit tests for :class:`iris.fileformats.netcdf.loader.ChunkControl`.""" + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests # isort:skip +from unittest.mock import ANY, patch + +import dask +import numpy as np +import pytest + +import iris +from iris.cube import CubeList +from iris.fileformats.netcdf import loader +from iris.fileformats.netcdf.loader import CHUNK_CONTROL +import iris.tests.stock as istk + + +@pytest.fixture() +def save_cubelist_with_sigma(tmp_filepath): + cube = istk.simple_4d_with_hybrid_height() + cube_varname = "my_var" + sigma_varname = "my_sigma" + cube.var_name = cube_varname + cube.coord("sigma").var_name = sigma_varname + cube.coord("sigma").guess_bounds() + iris.save(cube, tmp_filepath) + return cube_varname, sigma_varname + + +@pytest.fixture +def save_cube_with_chunksize(tmp_filepath): + cube = istk.simple_3d() + # adding an aux coord allows us to test that + # iris.fileformats.netcdf.loader._get_cf_var_data() + # will only throw an error if from_file mode is + # True when the entire cube has no specified chunking + aux = iris.coords.AuxCoord( + points=np.zeros((3, 4)), + long_name="random", + units="1", + ) + cube.add_aux_coord(aux, [1, 2]) + iris.save(cube, tmp_filepath, chunksizes=(1, 3, 4)) + + +@pytest.fixture(scope="session") +def tmp_filepath(tmp_path_factory): + tmp_dir = tmp_path_factory.mktemp("data") + tmp_path = tmp_dir / "tmp.nc" + return str(tmp_path) + + +@pytest.fixture(autouse=True) +def remove_min_bytes(): + old_min_bytes = loader._LAZYVAR_MIN_BYTES + loader._LAZYVAR_MIN_BYTES = 0 + yield + loader._LAZYVAR_MIN_BYTES = old_min_bytes + + +def test_default(tmp_filepath, save_cubelist_with_sigma): + cube_varname, _ = save_cubelist_with_sigma + cubes = CubeList(loader.load_cubes(tmp_filepath)) + cube = cubes.extract_cube(cube_varname) + assert cube.shape == (3, 4, 5, 6) + assert cube.lazy_data().chunksize == (3, 4, 5, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (4,) + assert sigma.lazy_bounds().chunksize == (4, 2) + + +def test_control_global(tmp_filepath, save_cubelist_with_sigma): + cube_varname, _ = save_cubelist_with_sigma + with CHUNK_CONTROL.set(model_level_number=2): + cubes = CubeList(loader.load_cubes(tmp_filepath)) + cube = cubes.extract_cube(cube_varname) + assert cube.shape == (3, 4, 5, 6) + assert cube.lazy_data().chunksize == (3, 2, 5, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (2,) + assert sigma.lazy_bounds().chunksize == (2, 2) + + +def test_control_sigma_only(tmp_filepath, save_cubelist_with_sigma): + cube_varname, sigma_varname = save_cubelist_with_sigma + with CHUNK_CONTROL.set(sigma_varname, model_level_number=2): + cubes = CubeList(loader.load_cubes(tmp_filepath)) + cube = cubes.extract_cube(cube_varname) + assert cube.shape == (3, 4, 5, 6) + assert cube.lazy_data().chunksize == (3, 4, 5, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (2,) + # N.B. this does not apply to bounds array + assert sigma.lazy_bounds().chunksize == (4, 2) + + +def test_control_cube_var(tmp_filepath, save_cubelist_with_sigma): + cube_varname, _ = save_cubelist_with_sigma + with CHUNK_CONTROL.set(cube_varname, model_level_number=2): + cubes = CubeList(loader.load_cubes(tmp_filepath)) + cube = cubes.extract_cube(cube_varname) + assert cube.shape == (3, 4, 5, 6) + assert cube.lazy_data().chunksize == (3, 2, 5, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (2,) + assert sigma.lazy_bounds().chunksize == (2, 2) + + +def test_invalid_chunksize(tmp_filepath, save_cubelist_with_sigma): + with pytest.raises(ValueError): + with CHUNK_CONTROL.set(model_level_numer="2"): + CubeList(loader.load_cubes(tmp_filepath)) + + +def test_invalid_var_name(tmp_filepath, save_cubelist_with_sigma): + with pytest.raises(ValueError): + with CHUNK_CONTROL.set([1, 2], model_level_numer="2"): + CubeList(loader.load_cubes(tmp_filepath)) + + +def test_control_multiple(tmp_filepath, save_cubelist_with_sigma): + cube_varname, sigma_varname = save_cubelist_with_sigma + with CHUNK_CONTROL.set( + cube_varname, model_level_number=2 + ), CHUNK_CONTROL.set(sigma_varname, model_level_number=3): + cubes = CubeList(loader.load_cubes(tmp_filepath)) + cube = cubes.extract_cube(cube_varname) + assert cube.shape == (3, 4, 5, 6) + assert cube.lazy_data().chunksize == (3, 2, 5, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (3,) + assert sigma.lazy_bounds().chunksize == (2, 2) + + +def test_neg_one(tmp_filepath, save_cubelist_with_sigma): + cube_varname, _ = save_cubelist_with_sigma + with dask.config.set({"array.chunk-size": "50B"}): + with CHUNK_CONTROL.set(model_level_number=-1): + cubes = CubeList(loader.load_cubes(tmp_filepath)) + cube = cubes.extract_cube(cube_varname) + assert cube.shape == (3, 4, 5, 6) + # uses known good output + assert cube.lazy_data().chunksize == (1, 4, 1, 1) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (4,) + assert sigma.lazy_bounds().chunksize == (4, 1) + + +def test_from_file(tmp_filepath, save_cube_with_chunksize): + with CHUNK_CONTROL.from_file(): + cube = next(loader.load_cubes(tmp_filepath)) + assert cube.shape == (2, 3, 4) + assert cube.lazy_data().chunksize == (1, 3, 4) + + +def test_no_chunks_from_file(tmp_filepath, save_cubelist_with_sigma): + cube_varname, _ = save_cubelist_with_sigma + with pytest.raises(KeyError): + with CHUNK_CONTROL.from_file(): + CubeList(loader.load_cubes(tmp_filepath)) + + +def test_as_dask(tmp_filepath, save_cubelist_with_sigma): + """ + This does not test return values, as we can't be sure + dask chunking behaviour won't change, or that it will differ + from our own chunking behaviour. + """ + message = "Mock called, rest of test unneeded" + with patch("iris.fileformats.netcdf.loader.as_lazy_data") as as_lazy_data: + as_lazy_data.side_effect = RuntimeError(message) + with CHUNK_CONTROL.as_dask(): + try: + CubeList(loader.load_cubes(tmp_filepath)) + except RuntimeError as e: + if str(e) != message: + raise e + as_lazy_data.assert_called_with(ANY, chunks=None, dask_chunking=True) + + +def test_pinned_optimisation(tmp_filepath, save_cubelist_with_sigma): + cube_varname, _ = save_cubelist_with_sigma + with dask.config.set({"array.chunk-size": "250B"}): + with CHUNK_CONTROL.set(model_level_number=2): + cubes = CubeList(loader.load_cubes(tmp_filepath)) + cube = cubes.extract_cube(cube_varname) + assert cube.shape == (3, 4, 5, 6) + # uses known good output + # known good output WITHOUT pinning: (1, 1, 5, 6) + assert cube.lazy_data().chunksize == (1, 2, 2, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (2,) + assert sigma.lazy_bounds().chunksize == (2, 2) + + +if __name__ == "__main__": + tests.main() diff --git a/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py index 6c487d74e7..af4a249866 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py @@ -15,7 +15,7 @@ from iris._lazy_data import _optimum_chunksize import iris.fileformats.cf -from iris.fileformats.netcdf.loader import _get_cf_var_data +from iris.fileformats.netcdf.loader import CHUNK_CONTROL, _get_cf_var_data class Test__get_cf_var_data(tests.IrisTest): @@ -30,6 +30,7 @@ def _make( cf_data = mock.MagicMock( _FillValue=None, __getitem__="", + dimensions=["dim_" + str(x) for x in range(len(shape or "1"))], ) cf_data.chunking = mock.MagicMock(return_value=chunksizes) if shape is None: @@ -61,6 +62,16 @@ def test_cf_data_chunks(self): expected_chunks = _optimum_chunksize(chunks, self.shape) self.assertArrayEqual(lazy_data_chunks, expected_chunks) + def test_cf_data_chunk_control(self): + # more thorough testing can be found at `test__chunk_control` + chunks = [2500, 240, 200] + cf_var = self._make(shape=(2500, 240, 200), chunksizes=chunks) + with CHUNK_CONTROL.set(dim_0=25, dim_1=24, dim_2=20): + lazy_data = _get_cf_var_data(cf_var, self.filename) + lazy_data_chunks = [c[0] for c in lazy_data.chunks] + expected_chunks = (25, 24, 20) + self.assertArrayEqual(lazy_data_chunks, expected_chunks) + def test_cf_data_no_chunks(self): # No chunks means chunks are calculated from the array's shape by # `iris._lazy_data._optimum_chunksize()`. diff --git a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py index 7e4901dce2..3e464243bf 100644 --- a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py +++ b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py @@ -42,6 +42,25 @@ def test_non_default_chunks(self): (result,) = np.unique(lazy_data.chunks) self.assertEqual(result, 24) + def test_dask_chunking(self): + data = np.arange(24) + chunks = (12,) + optimum = self.patch("iris._lazy_data._optimum_chunksize") + optimum.return_value = chunks + _ = as_lazy_data(data, chunks=None, dask_chunking=True) + self.assertFalse(optimum.called) + + def test_dask_chunking_error(self): + data = np.arange(24) + chunks = (12,) + optimum = self.patch("iris._lazy_data._optimum_chunksize") + optimum.return_value = chunks + with self.assertRaisesRegex( + ValueError, + r"Dask chunking chosen, but chunks already assigned value", + ): + as_lazy_data(data, chunks=chunks, dask_chunking=True) + def test_with_masked_constant(self): masked_data = ma.masked_array([8], mask=True) masked_constant = masked_data[0]