From 78a62d85c1b919377e21be9684169c5290706e27 Mon Sep 17 00:00:00 2001 From: Elias Sadek Date: Wed, 15 Nov 2023 14:11:05 +0000 Subject: [PATCH 01/18] converted tests to pytest, added neg_one, and incomplete from_file and as_dask tests --- .../integration/netcdf/test__chunk_control.py | 141 ++++++++++++++++++ .../integration/test_netcdf__chunk_control.py | 97 ------------ 2 files changed, 141 insertions(+), 97 deletions(-) create mode 100644 lib/iris/tests/integration/netcdf/test__chunk_control.py delete mode 100644 lib/iris/tests/integration/test_netcdf__chunk_control.py diff --git a/lib/iris/tests/integration/netcdf/test__chunk_control.py b/lib/iris/tests/integration/netcdf/test__chunk_control.py new file mode 100644 index 0000000000..fbb234972b --- /dev/null +++ b/lib/iris/tests/integration/netcdf/test__chunk_control.py @@ -0,0 +1,141 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +"""Integration tests for loading and saving netcdf files.""" + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests # isort:skip + +from pathlib import Path +import pytest +import shutil +import tempfile + +import iris +from iris.fileformats.netcdf import loader +from iris.fileformats.netcdf.loader import CHUNK_CONTROL +import iris.tests.stock as istk + + +@pytest.fixture() +def create_cube(tmp_filepath): + cube = istk.simple_4d_with_hybrid_height() + cube_varname = "my_var" + sigma_varname = "my_sigma" + cube.var_name = cube_varname + cube.coord("sigma").var_name = sigma_varname + cube.coord("sigma").guess_bounds() + iris.save(cube, tmp_filepath) + yield cube_varname, sigma_varname + + +@pytest.fixture +def tmp_filepath(): + tmp_dir = Path(tempfile.mkdtemp()) + tmp_path = tmp_dir / "tmp.nc" + yield tmp_path + shutil.rmtree(tmp_dir) + + +@pytest.fixture(autouse=True) +def remove_min_bytes(): + old_min_bytes = loader._LAZYVAR_MIN_BYTES + loader._LAZYVAR_MIN_BYTES = 0 + yield None + loader._LAZYVAR_MIN_BYTES = old_min_bytes + + +def test_default(tmp_filepath, create_cube): + cube = iris.load_cube(tmp_filepath, create_cube[0]) + assert cube.shape == (3, 4, 5, 6) + assert cube.lazy_data().chunksize == (3, 4, 5, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (4,) + assert sigma.lazy_bounds().chunksize == (4, 2) + +def test_control_global(tmp_filepath, create_cube): + with CHUNK_CONTROL.set(model_level_number=2): + cube = iris.load_cube(tmp_filepath, create_cube[0]) + assert cube.shape == (3, 4, 5, 6) + assert cube.lazy_data().chunksize == (3, 2, 5, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (2,) + assert sigma.lazy_bounds().chunksize == (2, 2) + +def test_control_sigma_only(tmp_filepath, create_cube): + with CHUNK_CONTROL.set(create_cube[1], model_level_number=2): + cube = iris.load_cube(tmp_filepath, create_cube[0]) + assert cube.shape == (3, 4, 5, 6) + assert cube.lazy_data().chunksize == (3, 4, 5, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (2,) + assert sigma.lazy_bounds().chunksize == (4, 2) + +def test_control_cube_var(tmp_filepath, create_cube): + with CHUNK_CONTROL.set(create_cube[0], model_level_number=2): + cube = iris.load_cube(tmp_filepath, create_cube[0]) + assert cube.shape == (3, 4, 5, 6) + assert cube.lazy_data().chunksize == (3, 2, 5, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (2,) + assert sigma.lazy_bounds().chunksize == (2, 2) + +def test_control_multiple(tmp_filepath, create_cube): + with CHUNK_CONTROL.set( + create_cube[0], model_level_number=2 + ), CHUNK_CONTROL.set(create_cube[1], model_level_number=3): + cube = iris.load_cube(tmp_filepath, create_cube[0]) + assert cube.shape == (3, 4, 5, 6) + assert cube.lazy_data().chunksize == (3, 2, 5, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (3,) + assert sigma.lazy_bounds().chunksize == (2, 2) + +def test_neg_one(tmp_filepath, create_cube): + with CHUNK_CONTROL.set(model_level_number=-1): + cube = iris.load_cube(tmp_filepath, create_cube[0]) + assert cube.shape == (3, 4, 5, 6) + assert cube.lazy_data().chunksize == (3, 4, 5, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (4,) + assert sigma.lazy_bounds().chunksize == (4, 2) + +def test_from_file(tmp_filepath, create_cube): + with CHUNK_CONTROL.from_file(): + cube = iris.load_cube(tmp_filepath, create_cube[0]) + assert cube.shape == (3, 4, 5, 6) + assert cube.lazy_data().chunksize == (3, 4, 5, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (4,) + assert sigma.lazy_bounds().chunksize == (4, 2) + +def test_as_dask(tmp_filepath, create_cube): + with CHUNK_CONTROL.as_dask(): + cube = iris.load_cube(tmp_filepath, create_cube[0]) + assert cube.shape == (3, 4, 5, 6) + assert cube.lazy_data().chunksize == (3, 4, 5, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (4,) + assert sigma.lazy_bounds().chunksize == (4, 2) + +if __name__ == "__main__": + tests.main() \ No newline at end of file diff --git a/lib/iris/tests/integration/test_netcdf__chunk_control.py b/lib/iris/tests/integration/test_netcdf__chunk_control.py deleted file mode 100644 index 03c952d63a..0000000000 --- a/lib/iris/tests/integration/test_netcdf__chunk_control.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the LGPL license. -# See COPYING and COPYING.LESSER in the root of the repository for full -# licensing details. -"""Integration tests for loading and saving netcdf files.""" - -# Import iris.tests first so that some things can be initialised before -# importing anything else. -import iris.tests as tests # isort:skip - -from pathlib import Path -import shutil -import tempfile - -import iris -from iris.fileformats.netcdf import loader -from iris.fileformats.netcdf.loader import CHUNK_CONTROL -import iris.tests.stock as istk - - -class TestChunking(tests.IrisTest): - @classmethod - def setUpClass(cls): - cls.old_min_bytes = loader._LAZYVAR_MIN_BYTES - loader._LAZYVAR_MIN_BYTES = 0 - cls.temp_dir = tempfile.mkdtemp() - cube = istk.simple_4d_with_hybrid_height() - cls.cube = cube - cls.cube_varname = "my_var" - cls.sigma_varname = "my_sigma" - cube.var_name = cls.cube_varname - cube.coord("sigma").var_name = cls.sigma_varname - cube.coord("sigma").guess_bounds() - cls.tempfile_path = Path(cls.temp_dir) / "tmp.nc" - iris.save(cls.cube, cls.tempfile_path) - - @classmethod - def tearDownClass(cls): - shutil.rmtree(cls.temp_dir) - loader._LAZYVAR_MIN_BYTES = cls.old_min_bytes - - def test_default(self): - cube = iris.load_cube(self.tempfile_path, self.cube_varname) - self.assertEqual((3, 4, 5, 6), cube.shape) - self.assertEqual((3, 4, 5, 6), cube.lazy_data().chunksize) - sigma = cube.coord("sigma") - self.assertEqual((4,), sigma.shape) - self.assertEqual((4,), sigma.lazy_points().chunksize) - self.assertEqual((4, 2), sigma.lazy_bounds().chunksize) - - def test_control_global(self): - with CHUNK_CONTROL.set(model_level_number=2): - cube = iris.load_cube(self.tempfile_path, self.cube_varname) - self.assertEqual((3, 4, 5, 6), cube.shape) - self.assertEqual((3, 2, 5, 6), cube.lazy_data().chunksize) - sigma = cube.coord("sigma") - self.assertEqual((4,), sigma.shape) - self.assertEqual((2,), sigma.lazy_points().chunksize) - self.assertEqual((2, 2), sigma.lazy_bounds().chunksize) - - def test_control_sigma_only(self): - with CHUNK_CONTROL.set(self.sigma_varname, model_level_number=2): - cube = iris.load_cube(self.tempfile_path, self.cube_varname) - self.assertEqual((3, 4, 5, 6), cube.shape) - self.assertEqual((3, 4, 5, 6), cube.lazy_data().chunksize) - sigma = cube.coord("sigma") - self.assertEqual((4,), sigma.shape) - self.assertEqual((2,), sigma.lazy_points().chunksize) - # N.B. this does not apply to bounds array - self.assertEqual((4, 2), sigma.lazy_bounds().chunksize) - - def test_control_cube_var(self): - with CHUNK_CONTROL.set(self.cube_varname, model_level_number=2): - cube = iris.load_cube(self.tempfile_path, self.cube_varname) - self.assertEqual((3, 4, 5, 6), cube.shape) - self.assertEqual((3, 2, 5, 6), cube.lazy_data().chunksize) - sigma = cube.coord("sigma") - self.assertEqual((4,), sigma.shape) - self.assertEqual((2,), sigma.lazy_points().chunksize) - self.assertEqual((2, 2), sigma.lazy_bounds().chunksize) - - def test_control_multiple(self): - with CHUNK_CONTROL.set( - self.cube_varname, model_level_number=2 - ), CHUNK_CONTROL.set(self.sigma_varname, model_level_number=3): - cube = iris.load_cube(self.tempfile_path, self.cube_varname) - self.assertEqual((3, 4, 5, 6), cube.shape) - self.assertEqual((3, 2, 5, 6), cube.lazy_data().chunksize) - sigma = cube.coord("sigma") - self.assertEqual((4,), sigma.shape) - self.assertEqual((3,), sigma.lazy_points().chunksize) - self.assertEqual((2, 2), sigma.lazy_bounds().chunksize) - - -if __name__ == "__main__": - tests.main() From a54f424ca90af42f992a151ffda2977427f646ff Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 15 Nov 2023 14:19:42 +0000 Subject: [PATCH 02/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../tests/integration/netcdf/test__chunk_control.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/iris/tests/integration/netcdf/test__chunk_control.py b/lib/iris/tests/integration/netcdf/test__chunk_control.py index fbb234972b..f31d0a8915 100644 --- a/lib/iris/tests/integration/netcdf/test__chunk_control.py +++ b/lib/iris/tests/integration/netcdf/test__chunk_control.py @@ -10,10 +10,11 @@ import iris.tests as tests # isort:skip from pathlib import Path -import pytest import shutil import tempfile +import pytest + import iris from iris.fileformats.netcdf import loader from iris.fileformats.netcdf.loader import CHUNK_CONTROL @@ -58,6 +59,7 @@ def test_default(tmp_filepath, create_cube): assert sigma.lazy_points().chunksize == (4,) assert sigma.lazy_bounds().chunksize == (4, 2) + def test_control_global(tmp_filepath, create_cube): with CHUNK_CONTROL.set(model_level_number=2): cube = iris.load_cube(tmp_filepath, create_cube[0]) @@ -69,6 +71,7 @@ def test_control_global(tmp_filepath, create_cube): assert sigma.lazy_points().chunksize == (2,) assert sigma.lazy_bounds().chunksize == (2, 2) + def test_control_sigma_only(tmp_filepath, create_cube): with CHUNK_CONTROL.set(create_cube[1], model_level_number=2): cube = iris.load_cube(tmp_filepath, create_cube[0]) @@ -80,6 +83,7 @@ def test_control_sigma_only(tmp_filepath, create_cube): assert sigma.lazy_points().chunksize == (2,) assert sigma.lazy_bounds().chunksize == (4, 2) + def test_control_cube_var(tmp_filepath, create_cube): with CHUNK_CONTROL.set(create_cube[0], model_level_number=2): cube = iris.load_cube(tmp_filepath, create_cube[0]) @@ -91,6 +95,7 @@ def test_control_cube_var(tmp_filepath, create_cube): assert sigma.lazy_points().chunksize == (2,) assert sigma.lazy_bounds().chunksize == (2, 2) + def test_control_multiple(tmp_filepath, create_cube): with CHUNK_CONTROL.set( create_cube[0], model_level_number=2 @@ -104,6 +109,7 @@ def test_control_multiple(tmp_filepath, create_cube): assert sigma.lazy_points().chunksize == (3,) assert sigma.lazy_bounds().chunksize == (2, 2) + def test_neg_one(tmp_filepath, create_cube): with CHUNK_CONTROL.set(model_level_number=-1): cube = iris.load_cube(tmp_filepath, create_cube[0]) @@ -115,6 +121,7 @@ def test_neg_one(tmp_filepath, create_cube): assert sigma.lazy_points().chunksize == (4,) assert sigma.lazy_bounds().chunksize == (4, 2) + def test_from_file(tmp_filepath, create_cube): with CHUNK_CONTROL.from_file(): cube = iris.load_cube(tmp_filepath, create_cube[0]) @@ -126,6 +133,7 @@ def test_from_file(tmp_filepath, create_cube): assert sigma.lazy_points().chunksize == (4,) assert sigma.lazy_bounds().chunksize == (4, 2) + def test_as_dask(tmp_filepath, create_cube): with CHUNK_CONTROL.as_dask(): cube = iris.load_cube(tmp_filepath, create_cube[0]) @@ -137,5 +145,6 @@ def test_as_dask(tmp_filepath, create_cube): assert sigma.lazy_points().chunksize == (4,) assert sigma.lazy_bounds().chunksize == (4, 2) + if __name__ == "__main__": - tests.main() \ No newline at end of file + tests.main() From c6f115c2080240353d012eae9d602a803009a305 Mon Sep 17 00:00:00 2001 From: Elias Sadek Date: Wed, 15 Nov 2023 15:50:20 +0000 Subject: [PATCH 03/18] added from_file test --- .../integration/netcdf/test__chunk_control.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/lib/iris/tests/integration/netcdf/test__chunk_control.py b/lib/iris/tests/integration/netcdf/test__chunk_control.py index f31d0a8915..ed612e1924 100644 --- a/lib/iris/tests/integration/netcdf/test__chunk_control.py +++ b/lib/iris/tests/integration/netcdf/test__chunk_control.py @@ -32,6 +32,10 @@ def create_cube(tmp_filepath): iris.save(cube, tmp_filepath) yield cube_varname, sigma_varname +@pytest.fixture +def create_file_cube(tmp_filepath): + iris.save(istk.simple_3d(), tmp_filepath, chunksizes=(1, 3, 4)) + yield None @pytest.fixture def tmp_filepath(): @@ -122,16 +126,11 @@ def test_neg_one(tmp_filepath, create_cube): assert sigma.lazy_bounds().chunksize == (4, 2) -def test_from_file(tmp_filepath, create_cube): +def test_from_file(tmp_filepath, create_file_cube): with CHUNK_CONTROL.from_file(): - cube = iris.load_cube(tmp_filepath, create_cube[0]) - assert cube.shape == (3, 4, 5, 6) - assert cube.lazy_data().chunksize == (3, 4, 5, 6) - - sigma = cube.coord("sigma") - assert sigma.shape == (4,) - assert sigma.lazy_points().chunksize == (4,) - assert sigma.lazy_bounds().chunksize == (4, 2) + cube = iris.load_cube(tmp_filepath) + assert cube.shape == (2, 3, 4) + assert cube.lazy_data().chunksize == (1, 3, 4) def test_as_dask(tmp_filepath, create_cube): From 1bb45a15de3535b882e61bf0089a6c1552def1ac Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 15 Nov 2023 15:51:24 +0000 Subject: [PATCH 04/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- lib/iris/tests/integration/netcdf/test__chunk_control.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/iris/tests/integration/netcdf/test__chunk_control.py b/lib/iris/tests/integration/netcdf/test__chunk_control.py index ed612e1924..ff0448c2d0 100644 --- a/lib/iris/tests/integration/netcdf/test__chunk_control.py +++ b/lib/iris/tests/integration/netcdf/test__chunk_control.py @@ -32,11 +32,13 @@ def create_cube(tmp_filepath): iris.save(cube, tmp_filepath) yield cube_varname, sigma_varname + @pytest.fixture def create_file_cube(tmp_filepath): iris.save(istk.simple_3d(), tmp_filepath, chunksizes=(1, 3, 4)) yield None + @pytest.fixture def tmp_filepath(): tmp_dir = Path(tempfile.mkdtemp()) From 82951d3c332f3e0a60da7d981033b2f0320f374f Mon Sep 17 00:00:00 2001 From: Elias Sadek Date: Thu, 16 Nov 2023 11:19:36 +0000 Subject: [PATCH 05/18] added mocking tests --- .../integration/netcdf/test__chunk_control.py | 28 +++++++++++++------ .../tests/unit/lazy_data/test_as_lazy_data.py | 17 +++++++++++ 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/lib/iris/tests/integration/netcdf/test__chunk_control.py b/lib/iris/tests/integration/netcdf/test__chunk_control.py index ed612e1924..ca560ad18f 100644 --- a/lib/iris/tests/integration/netcdf/test__chunk_control.py +++ b/lib/iris/tests/integration/netcdf/test__chunk_control.py @@ -12,13 +12,17 @@ from pathlib import Path import shutil import tempfile - +import dask +from dask import distributed import pytest +from unittest.mock import Mock, ANY import iris from iris.fileformats.netcdf import loader from iris.fileformats.netcdf.loader import CHUNK_CONTROL +from iris import _lazy_data import iris.tests.stock as istk +from numpy import dtype @pytest.fixture() @@ -134,15 +138,21 @@ def test_from_file(tmp_filepath, create_file_cube): def test_as_dask(tmp_filepath, create_cube): + message = "Mock called, rest of test unneeded" + loader.as_lazy_data = Mock(side_effect=RuntimeError(message)) with CHUNK_CONTROL.as_dask(): - cube = iris.load_cube(tmp_filepath, create_cube[0]) - assert cube.shape == (3, 4, 5, 6) - assert cube.lazy_data().chunksize == (3, 4, 5, 6) - - sigma = cube.coord("sigma") - assert sigma.shape == (4,) - assert sigma.lazy_points().chunksize == (4,) - assert sigma.lazy_bounds().chunksize == (4, 2) + try: + iris.load_cube(tmp_filepath, create_cube[0]) + except RuntimeError as e: + if str(e) == message: + pass + else: + raise e + loader.as_lazy_data.assert_called_with( + ANY, + chunks=None, + dask_chunking=True + ) if __name__ == "__main__": diff --git a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py index 7e4901dce2..2458c71b5a 100644 --- a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py +++ b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py @@ -10,12 +10,14 @@ import iris.tests as tests # isort:skip from unittest import mock +from unittest.mock import Mock import dask.array as da import dask.config import numpy as np import numpy.ma as ma +from iris import _lazy_data from iris._lazy_data import _optimum_chunksize, as_lazy_data @@ -42,6 +44,21 @@ def test_non_default_chunks(self): (result,) = np.unique(lazy_data.chunks) self.assertEqual(result, 24) + def test_dask_chunking(self): + data = np.arange(24) + chunks = (12,) + _lazy_data._optimum_chunksize = Mock(return_value=chunks) + as_lazy_data(data, chunks=None, dask_chunking=True) + self.assertFalse(_lazy_data._optimum_chunksize.called) + + def test_dask_chunking_error(self): + data = np.arange(24) + chunks = (12,) + _lazy_data._optimum_chunksize = Mock(return_value=chunks) + with self.assertRaises(ValueError) as ar: + as_lazy_data(data, chunks=chunks, dask_chunking=True) + self.assertEqual(str(ar.exception), f"Dask chunking chosen, but chunks already assigned value {chunks}") + def test_with_masked_constant(self): masked_data = ma.masked_array([8], mask=True) masked_constant = masked_data[0] From de72114e56064d33c34535198db52540d269b437 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 16 Nov 2023 11:21:06 +0000 Subject: [PATCH 06/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../tests/integration/netcdf/test__chunk_control.py | 11 +++++------ lib/iris/tests/unit/lazy_data/test_as_lazy_data.py | 5 ++++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/iris/tests/integration/netcdf/test__chunk_control.py b/lib/iris/tests/integration/netcdf/test__chunk_control.py index 485ece7c5b..104e575648 100644 --- a/lib/iris/tests/integration/netcdf/test__chunk_control.py +++ b/lib/iris/tests/integration/netcdf/test__chunk_control.py @@ -12,17 +12,18 @@ from pathlib import Path import shutil import tempfile +from unittest.mock import ANY, Mock + import dask from dask import distributed +from numpy import dtype import pytest -from unittest.mock import Mock, ANY import iris +from iris import _lazy_data from iris.fileformats.netcdf import loader from iris.fileformats.netcdf.loader import CHUNK_CONTROL -from iris import _lazy_data import iris.tests.stock as istk -from numpy import dtype @pytest.fixture() @@ -151,9 +152,7 @@ def test_as_dask(tmp_filepath, create_cube): else: raise e loader.as_lazy_data.assert_called_with( - ANY, - chunks=None, - dask_chunking=True + ANY, chunks=None, dask_chunking=True ) diff --git a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py index 2458c71b5a..2edf68abe2 100644 --- a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py +++ b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py @@ -57,7 +57,10 @@ def test_dask_chunking_error(self): _lazy_data._optimum_chunksize = Mock(return_value=chunks) with self.assertRaises(ValueError) as ar: as_lazy_data(data, chunks=chunks, dask_chunking=True) - self.assertEqual(str(ar.exception), f"Dask chunking chosen, but chunks already assigned value {chunks}") + self.assertEqual( + str(ar.exception), + f"Dask chunking chosen, but chunks already assigned value {chunks}", + ) def test_with_masked_constant(self): masked_data = ma.masked_array([8], mask=True) From 6d939a6cfb46439dcdf378cafaf765f5c1cff0a4 Mon Sep 17 00:00:00 2001 From: Elias Sadek Date: Thu, 16 Nov 2023 12:09:51 +0000 Subject: [PATCH 07/18] trial and error with mocks and patches, may or may not work --- lib/iris/tests/integration/netcdf/test__chunk_control.py | 1 + lib/iris/tests/unit/lazy_data/test_as_lazy_data.py | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/iris/tests/integration/netcdf/test__chunk_control.py b/lib/iris/tests/integration/netcdf/test__chunk_control.py index 485ece7c5b..8d856fbba5 100644 --- a/lib/iris/tests/integration/netcdf/test__chunk_control.py +++ b/lib/iris/tests/integration/netcdf/test__chunk_control.py @@ -141,6 +141,7 @@ def test_from_file(tmp_filepath, create_file_cube): def test_as_dask(tmp_filepath, create_cube): message = "Mock called, rest of test unneeded" + loader.as_lazy_data = Mock(side_effect=RuntimeError(message)) with CHUNK_CONTROL.as_dask(): try: diff --git a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py index 2458c71b5a..d14443d0c3 100644 --- a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py +++ b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py @@ -47,18 +47,21 @@ def test_non_default_chunks(self): def test_dask_chunking(self): data = np.arange(24) chunks = (12,) - _lazy_data._optimum_chunksize = Mock(return_value=chunks) + optimum = self.patch("iris._lazy_data._optimum_chunksize") + optimum.return_value = chunks as_lazy_data(data, chunks=None, dask_chunking=True) - self.assertFalse(_lazy_data._optimum_chunksize.called) + self.assertFalse(optimum.called) def test_dask_chunking_error(self): data = np.arange(24) chunks = (12,) - _lazy_data._optimum_chunksize = Mock(return_value=chunks) + optimum = self.patch("iris._lazy_data._optimum_chunksize") + optimum.return_value = chunks with self.assertRaises(ValueError) as ar: as_lazy_data(data, chunks=chunks, dask_chunking=True) self.assertEqual(str(ar.exception), f"Dask chunking chosen, but chunks already assigned value {chunks}") + def test_with_masked_constant(self): masked_data = ma.masked_array([8], mask=True) masked_constant = masked_data[0] From 8d728aed41509d24a53bd52b22b1d501c5fe7a29 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 16 Nov 2023 12:11:23 +0000 Subject: [PATCH 08/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- lib/iris/tests/unit/lazy_data/test_as_lazy_data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py index 8189a9d8fb..63cf75a3bb 100644 --- a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py +++ b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py @@ -64,7 +64,6 @@ def test_dask_chunking_error(self): f"Dask chunking chosen, but chunks already assigned value {chunks}", ) - def test_with_masked_constant(self): masked_data = ma.masked_array([8], mask=True) masked_constant = masked_data[0] From 36ba71e2d346aae6c10512d910e8a80eea52959c Mon Sep 17 00:00:00 2001 From: Elias Sadek Date: Thu, 16 Nov 2023 14:12:18 +0000 Subject: [PATCH 09/18] converted Mock to patch in as_dask test --- .../integration/netcdf/test__chunk_control.py | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/lib/iris/tests/integration/netcdf/test__chunk_control.py b/lib/iris/tests/integration/netcdf/test__chunk_control.py index 0f2157f853..bd0073dd64 100644 --- a/lib/iris/tests/integration/netcdf/test__chunk_control.py +++ b/lib/iris/tests/integration/netcdf/test__chunk_control.py @@ -12,15 +12,13 @@ from pathlib import Path import shutil import tempfile -from unittest.mock import ANY, Mock +from unittest.mock import ANY, patch import dask from dask import distributed -from numpy import dtype import pytest import iris -from iris import _lazy_data from iris.fileformats.netcdf import loader from iris.fileformats.netcdf.loader import CHUNK_CONTROL import iris.tests.stock as istk @@ -140,21 +138,22 @@ def test_from_file(tmp_filepath, create_file_cube): assert cube.lazy_data().chunksize == (1, 3, 4) + def test_as_dask(tmp_filepath, create_cube): message = "Mock called, rest of test unneeded" - - loader.as_lazy_data = Mock(side_effect=RuntimeError(message)) - with CHUNK_CONTROL.as_dask(): - try: - iris.load_cube(tmp_filepath, create_cube[0]) - except RuntimeError as e: - if str(e) == message: - pass - else: - raise e - loader.as_lazy_data.assert_called_with( - ANY, chunks=None, dask_chunking=True - ) + with patch("iris.fileformats.netcdf.loader.as_lazy_data") as optimum: + optimum.side_effect = RuntimeError(message) + with CHUNK_CONTROL.as_dask(): + try: + iris.load_cube(tmp_filepath, create_cube[0]) + except RuntimeError as e: + if str(e) == message: + pass + else: + raise e + optimum.assert_called_with( + ANY, chunks=None, dask_chunking=True + ) if __name__ == "__main__": From 0b635816aa053e9d2ba1371f4aea92c79abaea79 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 16 Nov 2023 14:13:36 +0000 Subject: [PATCH 10/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- lib/iris/tests/integration/netcdf/test__chunk_control.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/iris/tests/integration/netcdf/test__chunk_control.py b/lib/iris/tests/integration/netcdf/test__chunk_control.py index bd0073dd64..0078f531e6 100644 --- a/lib/iris/tests/integration/netcdf/test__chunk_control.py +++ b/lib/iris/tests/integration/netcdf/test__chunk_control.py @@ -138,7 +138,6 @@ def test_from_file(tmp_filepath, create_file_cube): assert cube.lazy_data().chunksize == (1, 3, 4) - def test_as_dask(tmp_filepath, create_cube): message = "Mock called, rest of test unneeded" with patch("iris.fileformats.netcdf.loader.as_lazy_data") as optimum: @@ -151,9 +150,7 @@ def test_as_dask(tmp_filepath, create_cube): pass else: raise e - optimum.assert_called_with( - ANY, chunks=None, dask_chunking=True - ) + optimum.assert_called_with(ANY, chunks=None, dask_chunking=True) if __name__ == "__main__": From 82a10b64bb326b9f1ff7a5a99bd109e9d5d4e054 Mon Sep 17 00:00:00 2001 From: Elias Sadek Date: Thu, 16 Nov 2023 16:15:14 +0000 Subject: [PATCH 11/18] review comment changes --- .../netcdf/loader}/test__chunk_control.py | 95 +++++++++++-------- .../tests/unit/lazy_data/test_as_lazy_data.py | 11 +-- 2 files changed, 62 insertions(+), 44 deletions(-) rename lib/iris/tests/{integration/netcdf => unit/fileformats/netcdf/loader}/test__chunk_control.py (55%) diff --git a/lib/iris/tests/integration/netcdf/test__chunk_control.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py similarity index 55% rename from lib/iris/tests/integration/netcdf/test__chunk_control.py rename to lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py index bd0073dd64..9520abbbb6 100644 --- a/lib/iris/tests/integration/netcdf/test__chunk_control.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py @@ -15,17 +15,18 @@ from unittest.mock import ANY, patch import dask -from dask import distributed +from dask import array as da import pytest import iris from iris.fileformats.netcdf import loader from iris.fileformats.netcdf.loader import CHUNK_CONTROL +from iris.cube import CubeList import iris.tests.stock as istk @pytest.fixture() -def create_cube(tmp_filepath): +def save_cubelist_with_sigma(tmp_filepath): cube = istk.simple_4d_with_hybrid_height() cube_varname = "my_var" sigma_varname = "my_sigma" @@ -33,33 +34,41 @@ def create_cube(tmp_filepath): cube.coord("sigma").var_name = sigma_varname cube.coord("sigma").guess_bounds() iris.save(cube, tmp_filepath) - yield cube_varname, sigma_varname + return cube_varname, sigma_varname @pytest.fixture -def create_file_cube(tmp_filepath): +def save_cube_with_chunksize(tmp_filepath): iris.save(istk.simple_3d(), tmp_filepath, chunksizes=(1, 3, 4)) - yield None -@pytest.fixture -def tmp_filepath(): - tmp_dir = Path(tempfile.mkdtemp()) +# @pytest.fixture +# def tmp_filepath(): +# tmp_dir = Path(tempfile.mkdtemp()) +# tmp_path = tmp_dir / "tmp.nc" +# yield str(tmp_path) +# shutil.rmtree(tmp_dir) + +@pytest.fixture(scope="session") +def tmp_filepath(tmp_path_factory): + tmp_dir = tmp_path_factory.mktemp("data") tmp_path = tmp_dir / "tmp.nc" - yield tmp_path - shutil.rmtree(tmp_dir) + yield str(tmp_path) + @pytest.fixture(autouse=True) def remove_min_bytes(): old_min_bytes = loader._LAZYVAR_MIN_BYTES loader._LAZYVAR_MIN_BYTES = 0 - yield None + yield loader._LAZYVAR_MIN_BYTES = old_min_bytes -def test_default(tmp_filepath, create_cube): - cube = iris.load_cube(tmp_filepath, create_cube[0]) +def test_default(tmp_filepath, save_cubelist_with_sigma): + cube_varname, _ = save_cubelist_with_sigma + cubes = CubeList(loader.load_cubes(tmp_filepath)) + cube = cubes.extract_cube(cube_varname) assert cube.shape == (3, 4, 5, 6) assert cube.lazy_data().chunksize == (3, 4, 5, 6) @@ -69,9 +78,11 @@ def test_default(tmp_filepath, create_cube): assert sigma.lazy_bounds().chunksize == (4, 2) -def test_control_global(tmp_filepath, create_cube): +def test_control_global(tmp_filepath, save_cubelist_with_sigma): + cube_varname, _ = save_cubelist_with_sigma with CHUNK_CONTROL.set(model_level_number=2): - cube = iris.load_cube(tmp_filepath, create_cube[0]) + cubes = CubeList(loader.load_cubes(tmp_filepath)) + cube = cubes.extract_cube(cube_varname) assert cube.shape == (3, 4, 5, 6) assert cube.lazy_data().chunksize == (3, 2, 5, 6) @@ -81,21 +92,26 @@ def test_control_global(tmp_filepath, create_cube): assert sigma.lazy_bounds().chunksize == (2, 2) -def test_control_sigma_only(tmp_filepath, create_cube): - with CHUNK_CONTROL.set(create_cube[1], model_level_number=2): - cube = iris.load_cube(tmp_filepath, create_cube[0]) +def test_control_sigma_only(tmp_filepath, save_cubelist_with_sigma): + cube_varname, sigma_varname = save_cubelist_with_sigma + with CHUNK_CONTROL.set(sigma_varname, model_level_number=2): + cubes = CubeList(loader.load_cubes(tmp_filepath)) + cube = cubes.extract_cube(cube_varname) assert cube.shape == (3, 4, 5, 6) assert cube.lazy_data().chunksize == (3, 4, 5, 6) sigma = cube.coord("sigma") assert sigma.shape == (4,) assert sigma.lazy_points().chunksize == (2,) + # N.B. this does not apply to bounds array assert sigma.lazy_bounds().chunksize == (4, 2) -def test_control_cube_var(tmp_filepath, create_cube): - with CHUNK_CONTROL.set(create_cube[0], model_level_number=2): - cube = iris.load_cube(tmp_filepath, create_cube[0]) +def test_control_cube_var(tmp_filepath, save_cubelist_with_sigma): + cube_varname, _ = save_cubelist_with_sigma + with CHUNK_CONTROL.set(cube_varname, model_level_number=2): + cubes = CubeList(loader.load_cubes(tmp_filepath)) + cube = cubes.extract_cube(cube_varname) assert cube.shape == (3, 4, 5, 6) assert cube.lazy_data().chunksize == (3, 2, 5, 6) @@ -105,11 +121,13 @@ def test_control_cube_var(tmp_filepath, create_cube): assert sigma.lazy_bounds().chunksize == (2, 2) -def test_control_multiple(tmp_filepath, create_cube): +def test_control_multiple(tmp_filepath, save_cubelist_with_sigma): + cube_varname, sigma_varname = save_cubelist_with_sigma with CHUNK_CONTROL.set( - create_cube[0], model_level_number=2 - ), CHUNK_CONTROL.set(create_cube[1], model_level_number=3): - cube = iris.load_cube(tmp_filepath, create_cube[0]) + cube_varname, model_level_number=2 + ), CHUNK_CONTROL.set(sigma_varname, model_level_number=3): + cubes = CubeList(loader.load_cubes(tmp_filepath)) + cube = cubes.extract_cube(cube_varname) assert cube.shape == (3, 4, 5, 6) assert cube.lazy_data().chunksize == (3, 2, 5, 6) @@ -119,37 +137,38 @@ def test_control_multiple(tmp_filepath, create_cube): assert sigma.lazy_bounds().chunksize == (2, 2) -def test_neg_one(tmp_filepath, create_cube): - with CHUNK_CONTROL.set(model_level_number=-1): - cube = iris.load_cube(tmp_filepath, create_cube[0]) +def test_neg_one(tmp_filepath, save_cubelist_with_sigma): + cube_varname, _ = save_cubelist_with_sigma + with dask.config.set({"array.chunk-size": "50B"}): + with CHUNK_CONTROL.set(model_level_number=-1): + cubes = CubeList(loader.load_cubes(tmp_filepath)) + cube = cubes.extract_cube(cube_varname) assert cube.shape == (3, 4, 5, 6) - assert cube.lazy_data().chunksize == (3, 4, 5, 6) + # uses known good output + assert cube.lazy_data().chunksize == (1, 4, 1, 1) sigma = cube.coord("sigma") assert sigma.shape == (4,) assert sigma.lazy_points().chunksize == (4,) - assert sigma.lazy_bounds().chunksize == (4, 2) + assert sigma.lazy_bounds().chunksize == (4, 1) -def test_from_file(tmp_filepath, create_file_cube): +def test_from_file(tmp_filepath, save_cube_with_chunksize): with CHUNK_CONTROL.from_file(): - cube = iris.load_cube(tmp_filepath) + cube = next(loader.load_cubes(tmp_filepath)) assert cube.shape == (2, 3, 4) assert cube.lazy_data().chunksize == (1, 3, 4) - -def test_as_dask(tmp_filepath, create_cube): +def test_as_dask(tmp_filepath, save_cubelist_with_sigma): message = "Mock called, rest of test unneeded" with patch("iris.fileformats.netcdf.loader.as_lazy_data") as optimum: optimum.side_effect = RuntimeError(message) with CHUNK_CONTROL.as_dask(): try: - iris.load_cube(tmp_filepath, create_cube[0]) + cubes = CubeList(loader.load_cubes(tmp_filepath)) except RuntimeError as e: - if str(e) == message: - pass - else: + if str(e) != message: raise e optimum.assert_called_with( ANY, chunks=None, dask_chunking=True diff --git a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py index 63cf75a3bb..65b95ac5f4 100644 --- a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py +++ b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py @@ -49,7 +49,7 @@ def test_dask_chunking(self): chunks = (12,) optimum = self.patch("iris._lazy_data._optimum_chunksize") optimum.return_value = chunks - as_lazy_data(data, chunks=None, dask_chunking=True) + _ = as_lazy_data(data, chunks=None, dask_chunking = True) self.assertFalse(optimum.called) def test_dask_chunking_error(self): @@ -57,12 +57,11 @@ def test_dask_chunking_error(self): chunks = (12,) optimum = self.patch("iris._lazy_data._optimum_chunksize") optimum.return_value = chunks - with self.assertRaises(ValueError) as ar: + with self.assertRaisesRegex( + ValueError, + r'Dask chunking chosen, but chunks already assigned value' + ) as ar: as_lazy_data(data, chunks=chunks, dask_chunking=True) - self.assertEqual( - str(ar.exception), - f"Dask chunking chosen, but chunks already assigned value {chunks}", - ) def test_with_masked_constant(self): masked_data = ma.masked_array([8], mask=True) From 4f8084791321786f0dc8fdc11eacb756410afe7f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 16 Nov 2023 16:18:14 +0000 Subject: [PATCH 12/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../unit/fileformats/netcdf/loader/test__chunk_control.py | 8 +++----- lib/iris/tests/unit/lazy_data/test_as_lazy_data.py | 6 +++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py index 9520abbbb6..acb4055e1d 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py @@ -19,9 +19,9 @@ import pytest import iris +from iris.cube import CubeList from iris.fileformats.netcdf import loader from iris.fileformats.netcdf.loader import CHUNK_CONTROL -from iris.cube import CubeList import iris.tests.stock as istk @@ -49,6 +49,7 @@ def save_cube_with_chunksize(tmp_filepath): # yield str(tmp_path) # shutil.rmtree(tmp_dir) + @pytest.fixture(scope="session") def tmp_filepath(tmp_path_factory): tmp_dir = tmp_path_factory.mktemp("data") @@ -56,7 +57,6 @@ def tmp_filepath(tmp_path_factory): yield str(tmp_path) - @pytest.fixture(autouse=True) def remove_min_bytes(): old_min_bytes = loader._LAZYVAR_MIN_BYTES @@ -170,9 +170,7 @@ def test_as_dask(tmp_filepath, save_cubelist_with_sigma): except RuntimeError as e: if str(e) != message: raise e - optimum.assert_called_with( - ANY, chunks=None, dask_chunking=True - ) + optimum.assert_called_with(ANY, chunks=None, dask_chunking=True) if __name__ == "__main__": diff --git a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py index 65b95ac5f4..a2c9cd10ef 100644 --- a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py +++ b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py @@ -49,7 +49,7 @@ def test_dask_chunking(self): chunks = (12,) optimum = self.patch("iris._lazy_data._optimum_chunksize") optimum.return_value = chunks - _ = as_lazy_data(data, chunks=None, dask_chunking = True) + _ = as_lazy_data(data, chunks=None, dask_chunking=True) self.assertFalse(optimum.called) def test_dask_chunking_error(self): @@ -58,8 +58,8 @@ def test_dask_chunking_error(self): optimum = self.patch("iris._lazy_data._optimum_chunksize") optimum.return_value = chunks with self.assertRaisesRegex( - ValueError, - r'Dask chunking chosen, but chunks already assigned value' + ValueError, + r"Dask chunking chosen, but chunks already assigned value", ) as ar: as_lazy_data(data, chunks=chunks, dask_chunking=True) From fee7ed273d96ff953c92ed87339a49f1fba07b45 Mon Sep 17 00:00:00 2001 From: Elias Sadek Date: Thu, 16 Nov 2023 16:24:27 +0000 Subject: [PATCH 13/18] pre commit fixes --- .../unit/fileformats/netcdf/loader/test__chunk_control.py | 6 +----- lib/iris/tests/unit/lazy_data/test_as_lazy_data.py | 4 +--- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py index 9520abbbb6..91ca0bf2ac 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py @@ -9,13 +9,9 @@ # importing anything else. import iris.tests as tests # isort:skip -from pathlib import Path -import shutil -import tempfile from unittest.mock import ANY, patch import dask -from dask import array as da import pytest import iris @@ -166,7 +162,7 @@ def test_as_dask(tmp_filepath, save_cubelist_with_sigma): optimum.side_effect = RuntimeError(message) with CHUNK_CONTROL.as_dask(): try: - cubes = CubeList(loader.load_cubes(tmp_filepath)) + CubeList(loader.load_cubes(tmp_filepath)) except RuntimeError as e: if str(e) != message: raise e diff --git a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py index 65b95ac5f4..478fb6d318 100644 --- a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py +++ b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py @@ -10,14 +10,12 @@ import iris.tests as tests # isort:skip from unittest import mock -from unittest.mock import Mock import dask.array as da import dask.config import numpy as np import numpy.ma as ma -from iris import _lazy_data from iris._lazy_data import _optimum_chunksize, as_lazy_data @@ -60,7 +58,7 @@ def test_dask_chunking_error(self): with self.assertRaisesRegex( ValueError, r'Dask chunking chosen, but chunks already assigned value' - ) as ar: + ): as_lazy_data(data, chunks=chunks, dask_chunking=True) def test_with_masked_constant(self): From 6727f7b3cdd1cb022ead3cbb0a488fa868ee300c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 16 Nov 2023 16:26:26 +0000 Subject: [PATCH 14/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- lib/iris/tests/unit/lazy_data/test_as_lazy_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py index 478fb6d318..3e464243bf 100644 --- a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py +++ b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py @@ -47,7 +47,7 @@ def test_dask_chunking(self): chunks = (12,) optimum = self.patch("iris._lazy_data._optimum_chunksize") optimum.return_value = chunks - _ = as_lazy_data(data, chunks=None, dask_chunking = True) + _ = as_lazy_data(data, chunks=None, dask_chunking=True) self.assertFalse(optimum.called) def test_dask_chunking_error(self): @@ -56,8 +56,8 @@ def test_dask_chunking_error(self): optimum = self.patch("iris._lazy_data._optimum_chunksize") optimum.return_value = chunks with self.assertRaisesRegex( - ValueError, - r'Dask chunking chosen, but chunks already assigned value' + ValueError, + r"Dask chunking chosen, but chunks already assigned value", ): as_lazy_data(data, chunks=chunks, dask_chunking=True) From 139fd41f2c953e83de2a28057fceeb80a658b6d6 Mon Sep 17 00:00:00 2001 From: Elias Sadek Date: Fri, 17 Nov 2023 14:59:02 +0000 Subject: [PATCH 15/18] review comments, and added test in test__get_cf_var_data() --- .../netcdf/loader/test__chunk_control.py | 17 ++++------------- .../netcdf/loader/test__get_cf_var_data.py | 13 ++++++++++++- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py index 7125a97dec..1c05d66281 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py @@ -1,9 +1,8 @@ # Copyright Iris contributors # -# This file is part of Iris and is released under the LGPL license. -# See COPYING and COPYING.LESSER in the root of the repository for full -# licensing details. -"""Integration tests for loading and saving netcdf files.""" +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Unit tests for :class:`iris.fileformats.netcdf.loader.ChunkControl`.""" # Import iris.tests first so that some things can be initialised before # importing anything else. @@ -38,19 +37,11 @@ def save_cube_with_chunksize(tmp_filepath): iris.save(istk.simple_3d(), tmp_filepath, chunksizes=(1, 3, 4)) -# @pytest.fixture -# def tmp_filepath(): -# tmp_dir = Path(tempfile.mkdtemp()) -# tmp_path = tmp_dir / "tmp.nc" -# yield str(tmp_path) -# shutil.rmtree(tmp_dir) - - @pytest.fixture(scope="session") def tmp_filepath(tmp_path_factory): tmp_dir = tmp_path_factory.mktemp("data") tmp_path = tmp_dir / "tmp.nc" - yield str(tmp_path) + return str(tmp_path) @pytest.fixture(autouse=True) diff --git a/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py index 6c487d74e7..9fd426ea37 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py @@ -15,7 +15,7 @@ from iris._lazy_data import _optimum_chunksize import iris.fileformats.cf -from iris.fileformats.netcdf.loader import _get_cf_var_data +from iris.fileformats.netcdf.loader import _get_cf_var_data, CHUNK_CONTROL class Test__get_cf_var_data(tests.IrisTest): @@ -27,9 +27,11 @@ def setUp(self): def _make( self, chunksizes=None, shape=None, dtype="i4", **extra_properties ): + cf_data = mock.MagicMock( _FillValue=None, __getitem__="", + dimensions=["dim_"+str(x) for x in range(len(shape or "1"))] ) cf_data.chunking = mock.MagicMock(return_value=chunksizes) if shape is None: @@ -61,6 +63,15 @@ def test_cf_data_chunks(self): expected_chunks = _optimum_chunksize(chunks, self.shape) self.assertArrayEqual(lazy_data_chunks, expected_chunks) + def test_cf_data_chunk_control(self): + chunks = [2500, 240, 200] + cf_var = self._make(shape=(2500, 240, 200), chunksizes=chunks) + with CHUNK_CONTROL.set(dim_0=25, dim_1=24, dim_2=20): + lazy_data = _get_cf_var_data(cf_var, self.filename) + lazy_data_chunks = [c[0] for c in lazy_data.chunks] + expected_chunks = (25, 24, 20) + self.assertArrayEqual(lazy_data_chunks, expected_chunks) + def test_cf_data_no_chunks(self): # No chunks means chunks are calculated from the array's shape by # `iris._lazy_data._optimum_chunksize()`. From bfe23b6996b5a10e222c67ffac2887dae4944326 Mon Sep 17 00:00:00 2001 From: Elias Sadek Date: Fri, 17 Nov 2023 16:21:29 +0000 Subject: [PATCH 16/18] added in another test --- .../netcdf/loader/test__chunk_control.py | 16 ++++++++++++++++ .../netcdf/loader/test__get_cf_var_data.py | 5 ++--- .../tests/unit/lazy_data/test_as_lazy_data.py | 6 +++--- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py index 1c05d66281..620d879102 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py @@ -160,5 +160,21 @@ def test_as_dask(tmp_filepath, save_cubelist_with_sigma): optimum.assert_called_with(ANY, chunks=None, dask_chunking=True) +def test_pinned_optimisation(tmp_filepath, save_cubelist_with_sigma): + cube_varname, _ = save_cubelist_with_sigma + with dask.config.set({"array.chunk-size": "250B"}): + with CHUNK_CONTROL.set(model_level_number=2): + cubes = CubeList(loader.load_cubes(tmp_filepath)) + cube = cubes.extract_cube(cube_varname) + assert cube.shape == (3, 4, 5, 6) + # uses known good output + assert cube.lazy_data().chunksize == (1, 2, 2, 6) + + sigma = cube.coord("sigma") + assert sigma.shape == (4,) + assert sigma.lazy_points().chunksize == (2,) + assert sigma.lazy_bounds().chunksize == (2, 2) + + if __name__ == "__main__": tests.main() diff --git a/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py index 9fd426ea37..b09bd740d7 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py @@ -15,7 +15,7 @@ from iris._lazy_data import _optimum_chunksize import iris.fileformats.cf -from iris.fileformats.netcdf.loader import _get_cf_var_data, CHUNK_CONTROL +from iris.fileformats.netcdf.loader import CHUNK_CONTROL, _get_cf_var_data class Test__get_cf_var_data(tests.IrisTest): @@ -27,11 +27,10 @@ def setUp(self): def _make( self, chunksizes=None, shape=None, dtype="i4", **extra_properties ): - cf_data = mock.MagicMock( _FillValue=None, __getitem__="", - dimensions=["dim_"+str(x) for x in range(len(shape or "1"))] + dimensions=["dim_" + str(x) for x in range(len(shape or "1"))], ) cf_data.chunking = mock.MagicMock(return_value=chunksizes) if shape is None: diff --git a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py index 478fb6d318..3e464243bf 100644 --- a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py +++ b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py @@ -47,7 +47,7 @@ def test_dask_chunking(self): chunks = (12,) optimum = self.patch("iris._lazy_data._optimum_chunksize") optimum.return_value = chunks - _ = as_lazy_data(data, chunks=None, dask_chunking = True) + _ = as_lazy_data(data, chunks=None, dask_chunking=True) self.assertFalse(optimum.called) def test_dask_chunking_error(self): @@ -56,8 +56,8 @@ def test_dask_chunking_error(self): optimum = self.patch("iris._lazy_data._optimum_chunksize") optimum.return_value = chunks with self.assertRaisesRegex( - ValueError, - r'Dask chunking chosen, but chunks already assigned value' + ValueError, + r"Dask chunking chosen, but chunks already assigned value", ): as_lazy_data(data, chunks=chunks, dask_chunking=True) From fc8c78b6d49da182ba4d51235d3a831b36322b6d Mon Sep 17 00:00:00 2001 From: Elias Sadek Date: Mon, 20 Nov 2023 13:54:52 +0000 Subject: [PATCH 17/18] added tests and fixed review comments --- lib/iris/fileformats/netcdf/loader.py | 15 ++++----- .../netcdf/loader/test__chunk_control.py | 31 +++++++++++++++++-- .../netcdf/loader/test__get_cf_var_data.py | 1 + 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/lib/iris/fileformats/netcdf/loader.py b/lib/iris/fileformats/netcdf/loader.py index 107415c79d..2f853f89fb 100644 --- a/lib/iris/fileformats/netcdf/loader.py +++ b/lib/iris/fileformats/netcdf/loader.py @@ -241,17 +241,14 @@ def _get_cf_var_data(cf_var, filename): result = as_lazy_data(proxy, chunks=None, dask_chunking=True) else: chunks = cf_var.cf_data.chunking() - if ( - chunks is None - and CHUNK_CONTROL.mode is ChunkControl.Modes.FROM_FILE - ): - raise KeyError( - f"{cf_var.cf_name} does not contain pre-existing chunk specifications." - f"Instead, you might wish to use CHUNK_CONTROL.set(), or just use default" - f" behaviour outside of a context manager. " - ) # In the "contiguous" case, pass chunks=None to 'as_lazy_data'. if chunks == "contiguous": + if CHUNK_CONTROL.mode is ChunkControl.Modes.FROM_FILE: + raise KeyError( + f"{cf_var.cf_name} does not contain pre-existing chunk specifications." + f"Instead, you might wish to use CHUNK_CONTROL.set(), or just use default" + f" behaviour outside of a context manager. " + ) # Equivalent to chunks=None, but value required by chunking control chunks = list(cf_var.shape) diff --git a/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py index 620d879102..1500c3fe38 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py @@ -108,6 +108,18 @@ def test_control_cube_var(tmp_filepath, save_cubelist_with_sigma): assert sigma.lazy_bounds().chunksize == (2, 2) +def test_invalid_chunksize(tmp_filepath, save_cubelist_with_sigma): + with pytest.raises(ValueError): + with CHUNK_CONTROL.set(model_level_numer="2"): + CubeList(loader.load_cubes(tmp_filepath)) + + +def test_invalid_var_name(tmp_filepath, save_cubelist_with_sigma): + with pytest.raises(ValueError): + with CHUNK_CONTROL.set([1, 2], model_level_numer="2"): + CubeList(loader.load_cubes(tmp_filepath)) + + def test_control_multiple(tmp_filepath, save_cubelist_with_sigma): cube_varname, sigma_varname = save_cubelist_with_sigma with CHUNK_CONTROL.set( @@ -147,17 +159,29 @@ def test_from_file(tmp_filepath, save_cube_with_chunksize): assert cube.lazy_data().chunksize == (1, 3, 4) +def test_no_chunks_from_file(tmp_filepath, save_cubelist_with_sigma): + cube_varname, _ = save_cubelist_with_sigma + with pytest.raises(KeyError): + with CHUNK_CONTROL.from_file(): + CubeList(loader.load_cubes(tmp_filepath)) + + def test_as_dask(tmp_filepath, save_cubelist_with_sigma): + """ + This does not test return values, as we can't be sure + dask chunking behaviour won't change, or that it will differ + from our own chunking behaviour. + """ message = "Mock called, rest of test unneeded" - with patch("iris.fileformats.netcdf.loader.as_lazy_data") as optimum: - optimum.side_effect = RuntimeError(message) + with patch("iris.fileformats.netcdf.loader.as_lazy_data") as as_lazy_data: + as_lazy_data.side_effect = RuntimeError(message) with CHUNK_CONTROL.as_dask(): try: CubeList(loader.load_cubes(tmp_filepath)) except RuntimeError as e: if str(e) != message: raise e - optimum.assert_called_with(ANY, chunks=None, dask_chunking=True) + as_lazy_data.assert_called_with(ANY, chunks=None, dask_chunking=True) def test_pinned_optimisation(tmp_filepath, save_cubelist_with_sigma): @@ -168,6 +192,7 @@ def test_pinned_optimisation(tmp_filepath, save_cubelist_with_sigma): cube = cubes.extract_cube(cube_varname) assert cube.shape == (3, 4, 5, 6) # uses known good output + # known good output WITHOUT pinning: (1, 1, 5, 6) assert cube.lazy_data().chunksize == (1, 2, 2, 6) sigma = cube.coord("sigma") diff --git a/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py index b09bd740d7..af4a249866 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__get_cf_var_data.py @@ -63,6 +63,7 @@ def test_cf_data_chunks(self): self.assertArrayEqual(lazy_data_chunks, expected_chunks) def test_cf_data_chunk_control(self): + # more thorough testing can be found at `test__chunk_control` chunks = [2500, 240, 200] cf_var = self._make(shape=(2500, 240, 200), chunksizes=chunks) with CHUNK_CONTROL.set(dim_0=25, dim_1=24, dim_2=20): From 929b03bded62c27fb84c7a3a7b110a176e6fa0f8 Mon Sep 17 00:00:00 2001 From: Elias Sadek Date: Mon, 20 Nov 2023 15:17:06 +0000 Subject: [PATCH 18/18] added AuxCoord test --- lib/iris/fileformats/netcdf/loader.py | 9 +++++++-- .../netcdf/loader/test__chunk_control.py | 15 +++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/lib/iris/fileformats/netcdf/loader.py b/lib/iris/fileformats/netcdf/loader.py index 2f853f89fb..934d5d11ea 100644 --- a/lib/iris/fileformats/netcdf/loader.py +++ b/lib/iris/fileformats/netcdf/loader.py @@ -243,10 +243,15 @@ def _get_cf_var_data(cf_var, filename): chunks = cf_var.cf_data.chunking() # In the "contiguous" case, pass chunks=None to 'as_lazy_data'. if chunks == "contiguous": - if CHUNK_CONTROL.mode is ChunkControl.Modes.FROM_FILE: + if ( + CHUNK_CONTROL.mode is ChunkControl.Modes.FROM_FILE + and isinstance( + cf_var, iris.fileformats.cf.CFDataVariable + ) + ): raise KeyError( f"{cf_var.cf_name} does not contain pre-existing chunk specifications." - f"Instead, you might wish to use CHUNK_CONTROL.set(), or just use default" + f" Instead, you might wish to use CHUNK_CONTROL.set(), or just use default" f" behaviour outside of a context manager. " ) # Equivalent to chunks=None, but value required by chunking control diff --git a/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py index 1500c3fe38..7249c39829 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py +++ b/lib/iris/tests/unit/fileformats/netcdf/loader/test__chunk_control.py @@ -7,10 +7,10 @@ # Import iris.tests first so that some things can be initialised before # importing anything else. import iris.tests as tests # isort:skip - from unittest.mock import ANY, patch import dask +import numpy as np import pytest import iris @@ -34,7 +34,18 @@ def save_cubelist_with_sigma(tmp_filepath): @pytest.fixture def save_cube_with_chunksize(tmp_filepath): - iris.save(istk.simple_3d(), tmp_filepath, chunksizes=(1, 3, 4)) + cube = istk.simple_3d() + # adding an aux coord allows us to test that + # iris.fileformats.netcdf.loader._get_cf_var_data() + # will only throw an error if from_file mode is + # True when the entire cube has no specified chunking + aux = iris.coords.AuxCoord( + points=np.zeros((3, 4)), + long_name="random", + units="1", + ) + cube.add_aux_coord(aux, [1, 2]) + iris.save(cube, tmp_filepath, chunksizes=(1, 3, 4)) @pytest.fixture(scope="session")