From 029535b102c484afb8bea2c0c524bc5189dc154f Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 17 May 2021 18:19:17 +0100 Subject: [PATCH 1/5] Remove workaround when dask-wrapping PP data, obsoleted by #4135. --- lib/iris/fileformats/pp.py | 30 ++++++++----------- lib/iris/util.py | 61 -------------------------------------- 2 files changed, 12 insertions(+), 79 deletions(-) diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py index 406da925b1..7589d27922 100644 --- a/lib/iris/fileformats/pp.py +++ b/lib/iris/fileformats/pp.py @@ -38,7 +38,6 @@ ) import iris.fileformats.rules import iris.coord_systems -from iris.util import _array_slice_ifempty try: import mo_pack @@ -594,23 +593,18 @@ def ndim(self): return len(self.shape) def __getitem__(self, keys): - # Check for 'empty' slicings, in which case don't fetch the data. - # Because, since Dask v2, 'dask.array.from_array' performs an empty - # slicing and we must not fetch the data at that time. - result = _array_slice_ifempty(keys, self.shape, self.dtype) - if result is None: - with open(self.path, "rb") as pp_file: - pp_file.seek(self.offset, os.SEEK_SET) - data_bytes = pp_file.read(self.data_len) - data = _data_bytes_to_shaped_array( - data_bytes, - self.lbpack, - self.boundary_packing, - self.shape, - self.src_dtype, - self.mdi, - ) - result = data.__getitem__(keys) + with open(self.path, "rb") as pp_file: + pp_file.seek(self.offset, os.SEEK_SET) + data_bytes = pp_file.read(self.data_len) + data = _data_bytes_to_shaped_array( + data_bytes, + self.lbpack, + self.boundary_packing, + self.shape, + self.src_dtype, + self.mdi, + ) + result = data.__getitem__(keys) return np.asanyarray(result, dtype=self.dtype) diff --git a/lib/iris/util.py b/lib/iris/util.py index d2fda82c0f..aea07a877c 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -960,67 +960,6 @@ def __lt__(self, other): return NotImplemented -def _array_slice_ifempty(keys, shape, dtype): - """ - Detect cases where an array slice will contain no data, as it contains a - zero-length dimension, and produce an equivalent result for those cases. - - The function indicates 'empty' slicing cases, by returning an array equal - to the slice result in those cases. - - Args: - - * keys (indexing key, or tuple of keys): - The argument from an array __getitem__ call. - Only tuples of integers and slices are supported, in particular no - newaxis, ellipsis or array keys. - These are the types of array access usage we expect from Dask. - * shape (tuple of int): - The shape of the array being indexed. - * dtype (numpy.dtype): - The dtype of the array being indexed. - - Returns: - result (np.ndarray or None): - If 'keys' contains a slice(0, 0), this is an ndarray of the correct - resulting shape and provided dtype. - Otherwise it is None. - - .. note:: - - This is used to prevent DataProxy arraylike objects from fetching their - file data when wrapped as Dask arrays. - This is because, for Dask >= 2.0, the "dask.array.from_array" call - performs a fetch like [0:0, 0:0, ...], to 'snapshot' array metadata. - This function enables us to avoid triggering a file data fetch in those - cases : This is consistent because the result will not contain any - actual data content. - - """ - # Convert a single key into a 1-tuple, so we always have a tuple of keys. - if isinstance(keys, tuple): - keys_tuple = keys - else: - keys_tuple = (keys,) - - if any(key == slice(0, 0) for key in keys_tuple): - # An 'empty' slice is present : Return a 'fake' array instead. - target_shape = list(shape) - for i_dim, key in enumerate(keys_tuple): - if key == slice(0, 0): - # Reduce dims with empty slicing to length 0. - target_shape[i_dim] = 0 - # Create a prototype result : no memory usage, as some dims are 0. - result = np.zeros(target_shape, dtype=dtype) - # Index with original keys to produce the desired result shape. - # Note : also ok in 0-length dims, as the slice is always '0:0'. - result = result[keys] - else: - result = None - - return result - - def create_temp_filename(suffix=""): """Return a temporary file name. From ea6f324b674b3e306643e1319080f11df0eb053e Mon Sep 17 00:00:00 2001 From: lbdreyer Date: Wed, 26 May 2021 22:26:51 +0100 Subject: [PATCH 2/5] Remove old slice testing --- .../unit/fileformats/pp/test_PPDataProxy.py | 124 ------------------ 1 file changed, 124 deletions(-) diff --git a/lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py b/lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py index 8a22da061c..ccaf7d7d0c 100644 --- a/lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py +++ b/lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py @@ -10,7 +10,6 @@ import iris.tests as tests from unittest import mock -import numpy as np from iris.fileformats.pp import PPDataProxy, SplittableInt @@ -34,128 +33,5 @@ def test_lbpack_raw(self): self.assertEqual(proxy.lbpack.n4, lbpack // 1000 % 10) -class SliceTranslator: - """ - Class to translate an array-indexing expression into a tuple of keys. - - An instance just returns the argument of its __getitem__ call. - - """ - - def __getitem__(self, keys): - return keys - - -# A multidimensional-indexable object that returns its index keys, so we can -# use multidimensional-indexing notation to specify a slicing expression. -Slices = SliceTranslator() - - -class Test__getitem__slicing(tests.IrisTest): - def _check_slicing( - self, test_shape, indices, result_shape, data_was_fetched=True - ): - # Check behaviour of the getitem call with specific slicings. - # Especially: check cases where a fetch does *not* read from the file. - # This is necessary because, since Dask 2.0, the "from_array" function - # takes a zero-length slice of its array argument, to capture array - # metadata, and in those cases we want to avoid file access. - test_dtype = np.dtype(np.float32) - proxy = PPDataProxy( - shape=test_shape, - src_dtype=test_dtype, - path=None, - offset=None, - data_len=None, - lbpack=0, # Note: a 'real' value is needed. - boundary_packing=None, - mdi=None, - ) - - # Mock out the file-open call, to see if the file would be read. - builtin_open_func_name = "builtins.open" - mock_fileopen = self.patch(builtin_open_func_name) - - # Also mock out the 'databytes_to_shaped_array' call, to fake minimal - # operation in the cases where file-open *does* get called. - fake_data = np.zeros(test_shape, dtype=test_dtype) - self.patch( - "iris.fileformats.pp._data_bytes_to_shaped_array", - mock.MagicMock(return_value=fake_data), - ) - - # Test the requested indexing operation. - result = proxy.__getitem__(indices) - - # Check the behaviour and results were as expected. - self.assertEqual(mock_fileopen.called, data_was_fetched) - self.assertIsInstance(result, np.ndarray) - self.assertEqual(result.dtype, test_dtype) - self.assertEqual(result.shape, result_shape) - - def test_slicing_1d_normal(self): - # A 'normal' 1d testcase with no empty slices. - self._check_slicing( - test_shape=(3,), - indices=Slices[1:10], - result_shape=(2,), - data_was_fetched=True, - ) - - def test_slicing_1d_empty(self): - # A 1d testcase with an empty slicing. - self._check_slicing( - test_shape=(3,), - indices=Slices[0:0], - result_shape=(0,), - data_was_fetched=False, - ) - - def test_slicing_2d_normal(self): - # A 2d testcase with no empty slices. - self._check_slicing( - test_shape=(3, 4), - indices=Slices[2, :3], - result_shape=(3,), - data_was_fetched=True, - ) - - def test_slicing_2d_allempty(self): - # A 2d testcase with all empty slices. - self._check_slicing( - test_shape=(3, 4), - indices=Slices[0:0, 0:0], - result_shape=(0, 0), - data_was_fetched=False, - ) - - def test_slicing_2d_empty_dim0(self): - # A 2d testcase with an empty slice. - self._check_slicing( - test_shape=(3, 4), - indices=Slices[0:0], - result_shape=(0, 4), - data_was_fetched=False, - ) - - def test_slicing_2d_empty_dim1(self): - # A 2d testcase with an empty slice, and an integer index. - self._check_slicing( - test_shape=(3, 4), - indices=Slices[1, 0:0], - result_shape=(0,), - data_was_fetched=False, - ) - - def test_slicing_complex(self): - # Multiple dimensions with multiple empty slices. - self._check_slicing( - test_shape=(3, 4, 2, 5, 6, 3, 7), - indices=Slices[1:3, 2, 0:0, :, 1:1, :100], - result_shape=(2, 0, 5, 0, 3, 7), - data_was_fetched=False, - ) - - if __name__ == "__main__": tests.main() From 8d45cfba574274efe90d732855b2102bd8c74473 Mon Sep 17 00:00:00 2001 From: lbdreyer Date: Thu, 27 May 2021 11:40:36 +0100 Subject: [PATCH 3/5] Add whats new --- docs/iris/src/whatsnew/3.0.2.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/iris/src/whatsnew/3.0.2.rst b/docs/iris/src/whatsnew/3.0.2.rst index 92f1a76c6b..1110be9518 100644 --- a/docs/iris/src/whatsnew/3.0.2.rst +++ b/docs/iris/src/whatsnew/3.0.2.rst @@ -53,6 +53,10 @@ This document explains the changes made to Iris for this release (:pull:`4135`) Note that, the above contributions labelled with ``pre-v3.1.0`` are part of the forthcoming + #. `@pp-mo`_ reverted a change made previously in (:pull:`3659`) to PPDataProxy.__getitem__. + The check for empty slicings is no longer needed since (:pull:`4135`) was added. + (:pull:`4141`) + Iris v3.1.0 release, but require to be included in this patch release. From cfd3f53ebacc4fbcc9093857707b0db194ef1b5d Mon Sep 17 00:00:00 2001 From: lbdreyer Date: Thu, 27 May 2021 16:24:05 +0100 Subject: [PATCH 4/5] move whitespace? --- docs/iris/src/whatsnew/3.0.2.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/iris/src/whatsnew/3.0.2.rst b/docs/iris/src/whatsnew/3.0.2.rst index 1110be9518..a2cd02dad2 100644 --- a/docs/iris/src/whatsnew/3.0.2.rst +++ b/docs/iris/src/whatsnew/3.0.2.rst @@ -52,11 +52,11 @@ This document explains the changes made to Iris for this release the dask 'test access'. This makes loading of netcdf files with a large number of variables significantly faster. (:pull:`4135`) - Note that, the above contributions labelled with ``pre-v3.1.0`` are part of the forthcoming - #. `@pp-mo`_ reverted a change made previously in (:pull:`3659`) to PPDataProxy.__getitem__. - The check for empty slicings is no longer needed since (:pull:`4135`) was added. + #. `@pp-mo`_ reverted a change made previously in (:pull:`3659`) to + PPDataProxy.__getitem__. The check for empty slicings is no longer needed + since (:pull:`4135`) was added. (:pull:`4141`) - + Note that, the above contributions labelled with ``pre-v3.1.0`` are part of the forthcoming Iris v3.1.0 release, but require to be included in this patch release. From a512a80350f9a274b33906374f8eaaf42a99fb06 Mon Sep 17 00:00:00 2001 From: lbdreyer Date: Thu, 27 May 2021 16:30:43 +0100 Subject: [PATCH 5/5] missing line --- docs/iris/src/whatsnew/3.0.2.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/iris/src/whatsnew/3.0.2.rst b/docs/iris/src/whatsnew/3.0.2.rst index a2cd02dad2..5406501376 100644 --- a/docs/iris/src/whatsnew/3.0.2.rst +++ b/docs/iris/src/whatsnew/3.0.2.rst @@ -56,6 +56,7 @@ This document explains the changes made to Iris for this release PPDataProxy.__getitem__. The check for empty slicings is no longer needed since (:pull:`4135`) was added. (:pull:`4141`) + Note that, the above contributions labelled with ``pre-v3.1.0`` are part of the forthcoming Iris v3.1.0 release, but require to be included in this patch release.