SciTools · bjlittle · May 27, 2021 · May 17, 2021 · May 26, 2021 · May 27, 2021
diff --git a/docs/iris/src/whatsnew/3.0.2.rst b/docs/iris/src/whatsnew/3.0.2.rst
@@ -52,6 +52,11 @@ This document explains the changes made to Iris for this release
       the dask 'test access'.  This makes loading of netcdf files with a large number of variables significantly faster.
       (:pull:`4135`)
 
+   #. `@pp-mo`_ reverted a change made previously in (:pull:`3659`) to
+      PPDataProxy.__getitem__. The check for empty slicings is no longer needed
+      since (:pull:`4135`) was added.
+      (:pull:`4141`)
+
    Note that, the above contributions labelled with ``pre-v3.1.0`` are part of the forthcoming
    Iris v3.1.0 release, but require to be included in this patch release.
 

diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py
@@ -38,7 +38,6 @@
 )
 import iris.fileformats.rules
 import iris.coord_systems
-from iris.util import _array_slice_ifempty
 
 try:
     import mo_pack
@@ -594,23 +593,18 @@ def ndim(self):
         return len(self.shape)
 
     def __getitem__(self, keys):
-        # Check for 'empty' slicings, in which case don't fetch the data.
-        # Because, since Dask v2, 'dask.array.from_array' performs an empty
-        # slicing and we must not fetch the data at that time.
-        result = _array_slice_ifempty(keys, self.shape, self.dtype)
-        if result is None:
-            with open(self.path, "rb") as pp_file:
-                pp_file.seek(self.offset, os.SEEK_SET)
-                data_bytes = pp_file.read(self.data_len)
-                data = _data_bytes_to_shaped_array(
-                    data_bytes,
-                    self.lbpack,
-                    self.boundary_packing,
-                    self.shape,
-                    self.src_dtype,
-                    self.mdi,
-                )
-            result = data.__getitem__(keys)
+        with open(self.path, "rb") as pp_file:
+            pp_file.seek(self.offset, os.SEEK_SET)
+            data_bytes = pp_file.read(self.data_len)
+            data = _data_bytes_to_shaped_array(
+                data_bytes,
+                self.lbpack,
+                self.boundary_packing,
+                self.shape,
+                self.src_dtype,
+                self.mdi,
+            )
+        result = data.__getitem__(keys)
 
         return np.asanyarray(result, dtype=self.dtype)
 

diff --git a/lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py b/lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py
@@ -10,7 +10,6 @@
 import iris.tests as tests
 
 from unittest import mock
-import numpy as np
 
 from iris.fileformats.pp import PPDataProxy, SplittableInt
 
@@ -34,128 +33,5 @@ def test_lbpack_raw(self):
         self.assertEqual(proxy.lbpack.n4, lbpack // 1000 % 10)
 
 
-class SliceTranslator:
-    """
-    Class to translate an array-indexing expression into a tuple of keys.
-
-    An instance just returns the argument of its __getitem__ call.
-
-    """
-
-    def __getitem__(self, keys):
-        return keys
-
-
-# A multidimensional-indexable object that returns its index keys, so we can
-# use multidimensional-indexing notation to specify a slicing expression.
-Slices = SliceTranslator()
-
-
-class Test__getitem__slicing(tests.IrisTest):
-    def _check_slicing(
-        self, test_shape, indices, result_shape, data_was_fetched=True
-    ):
-        # Check behaviour of the getitem call with specific slicings.
-        # Especially: check cases where a fetch does *not* read from the file.
-        # This is necessary because, since Dask 2.0, the "from_array" function
-        # takes a zero-length slice of its array argument, to capture array
-        # metadata, and in those cases we want to avoid file access.
-        test_dtype = np.dtype(np.float32)
-        proxy = PPDataProxy(
-            shape=test_shape,
-            src_dtype=test_dtype,
-            path=None,
-            offset=None,
-            data_len=None,
-            lbpack=0,  # Note: a 'real' value is needed.
-            boundary_packing=None,
-            mdi=None,
-        )
-
-        # Mock out the file-open call, to see if the file would be read.
-        builtin_open_func_name = "builtins.open"
-        mock_fileopen = self.patch(builtin_open_func_name)
-
-        # Also mock out the 'databytes_to_shaped_array' call, to fake minimal
-        # operation in the cases where file-open *does* get called.
-        fake_data = np.zeros(test_shape, dtype=test_dtype)
-        self.patch(
-            "iris.fileformats.pp._data_bytes_to_shaped_array",
-            mock.MagicMock(return_value=fake_data),
-        )
-
-        # Test the requested indexing operation.
-        result = proxy.__getitem__(indices)
-
-        # Check the behaviour and results were as expected.
-        self.assertEqual(mock_fileopen.called, data_was_fetched)
-        self.assertIsInstance(result, np.ndarray)
-        self.assertEqual(result.dtype, test_dtype)
-        self.assertEqual(result.shape, result_shape)
-
-    def test_slicing_1d_normal(self):
-        # A 'normal' 1d testcase with no empty slices.
-        self._check_slicing(
-            test_shape=(3,),
-            indices=Slices[1:10],
-            result_shape=(2,),
-            data_was_fetched=True,
-        )
-
-    def test_slicing_1d_empty(self):
-        # A 1d testcase with an empty slicing.
-        self._check_slicing(
-            test_shape=(3,),
-            indices=Slices[0:0],
-            result_shape=(0,),
-            data_was_fetched=False,
-        )
-
-    def test_slicing_2d_normal(self):
-        # A 2d testcase with no empty slices.
-        self._check_slicing(
-            test_shape=(3, 4),
-            indices=Slices[2, :3],
-            result_shape=(3,),
-            data_was_fetched=True,
-        )
-
-    def test_slicing_2d_allempty(self):
-        # A 2d testcase with all empty slices.
-        self._check_slicing(
-            test_shape=(3, 4),
-            indices=Slices[0:0, 0:0],
-            result_shape=(0, 0),
-            data_was_fetched=False,
-        )
-
-    def test_slicing_2d_empty_dim0(self):
-        # A 2d testcase with an empty slice.
-        self._check_slicing(
-            test_shape=(3, 4),
-            indices=Slices[0:0],
-            result_shape=(0, 4),
-            data_was_fetched=False,
-        )
-
-    def test_slicing_2d_empty_dim1(self):
-        # A 2d testcase with an empty slice, and an integer index.
-        self._check_slicing(
-            test_shape=(3, 4),
-            indices=Slices[1, 0:0],
-            result_shape=(0,),
-            data_was_fetched=False,
-        )
-
-    def test_slicing_complex(self):
-        # Multiple dimensions with multiple empty slices.
-        self._check_slicing(
-            test_shape=(3, 4, 2, 5, 6, 3, 7),
-            indices=Slices[1:3, 2, 0:0, :, 1:1, :100],
-            result_shape=(2, 0, 5, 0, 3, 7),
-            data_was_fetched=False,
-        )
-
-
 if __name__ == "__main__":
     tests.main()
diff --git a/lib/iris/util.py b/lib/iris/util.py
@@ -960,67 +960,6 @@ def __lt__(self, other):
             return NotImplemented
 
 
-def _array_slice_ifempty(keys, shape, dtype):
-    """
-    Detect cases where an array slice will contain no data, as it contains a
-    zero-length dimension, and produce an equivalent result for those cases.
-
-    The function indicates 'empty' slicing cases, by returning an array equal
-    to the slice result in those cases.
-
-    Args:
-
-    * keys (indexing key, or tuple of keys):
-        The argument from an array __getitem__ call.
-        Only tuples of integers and slices are supported, in particular no
-        newaxis, ellipsis or array keys.
-        These are the types of array access usage we expect from Dask.
-    * shape (tuple of int):
-        The shape of the array being indexed.
-    * dtype (numpy.dtype):
-        The dtype of the array being indexed.
-
-    Returns:
-        result (np.ndarray or None):
-            If 'keys' contains a slice(0, 0), this is an ndarray of the correct
-            resulting shape and provided dtype.
-            Otherwise it is None.
-
-    .. note::
-
-        This is used to prevent DataProxy arraylike objects from fetching their
-        file data when wrapped as Dask arrays.
-        This is because, for Dask >= 2.0, the "dask.array.from_array" call
-        performs a fetch like [0:0, 0:0, ...], to 'snapshot' array metadata.
-        This function enables us to avoid triggering a file data fetch in those
-        cases :  This is consistent because the result will not contain any
-        actual data content.
-
-    """
-    # Convert a single key into a 1-tuple, so we always have a tuple of keys.
-    if isinstance(keys, tuple):
-        keys_tuple = keys
-    else:
-        keys_tuple = (keys,)
-
-    if any(key == slice(0, 0) for key in keys_tuple):
-        # An 'empty' slice is present :  Return a 'fake' array instead.
-        target_shape = list(shape)
-        for i_dim, key in enumerate(keys_tuple):
-            if key == slice(0, 0):
-                # Reduce dims with empty slicing to length 0.
-                target_shape[i_dim] = 0
-        # Create a prototype result : no memory usage, as some dims are 0.
-        result = np.zeros(target_shape, dtype=dtype)
-        # Index with original keys to produce the desired result shape.
-        # Note : also ok in 0-length dims, as the slice is always '0:0'.
-        result = result[keys]
-    else:
-        result = None
-
-    return result
-
-
 def create_temp_filename(suffix=""):
     """Return a temporary file name.