Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/iris/src/whatsnew/3.0.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ This document explains the changes made to Iris for this release
the dask 'test access'. This makes loading of netcdf files with a large number of variables significantly faster.
(:pull:`4135`)

#. `@pp-mo`_ reverted a change made previously in (:pull:`3659`) to
PPDataProxy.__getitem__. The check for empty slicings is no longer needed
since (:pull:`4135`) was added.
(:pull:`4141`)

Note that, the above contributions labelled with ``pre-v3.1.0`` are part of the forthcoming
Iris v3.1.0 release, but require to be included in this patch release.

Expand Down
30 changes: 12 additions & 18 deletions lib/iris/fileformats/pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
)
import iris.fileformats.rules
import iris.coord_systems
from iris.util import _array_slice_ifempty

try:
import mo_pack
Expand Down Expand Up @@ -594,23 +593,18 @@ def ndim(self):
return len(self.shape)

def __getitem__(self, keys):
# Check for 'empty' slicings, in which case don't fetch the data.
# Because, since Dask v2, 'dask.array.from_array' performs an empty
# slicing and we must not fetch the data at that time.
result = _array_slice_ifempty(keys, self.shape, self.dtype)
if result is None:
with open(self.path, "rb") as pp_file:
pp_file.seek(self.offset, os.SEEK_SET)
data_bytes = pp_file.read(self.data_len)
data = _data_bytes_to_shaped_array(
data_bytes,
self.lbpack,
self.boundary_packing,
self.shape,
self.src_dtype,
self.mdi,
)
result = data.__getitem__(keys)
with open(self.path, "rb") as pp_file:
pp_file.seek(self.offset, os.SEEK_SET)
data_bytes = pp_file.read(self.data_len)
data = _data_bytes_to_shaped_array(
data_bytes,
self.lbpack,
self.boundary_packing,
self.shape,
self.src_dtype,
self.mdi,
)
result = data.__getitem__(keys)

return np.asanyarray(result, dtype=self.dtype)

Expand Down
124 changes: 0 additions & 124 deletions lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import iris.tests as tests

from unittest import mock
import numpy as np

from iris.fileformats.pp import PPDataProxy, SplittableInt

Expand All @@ -34,128 +33,5 @@ def test_lbpack_raw(self):
self.assertEqual(proxy.lbpack.n4, lbpack // 1000 % 10)


class SliceTranslator:
"""
Class to translate an array-indexing expression into a tuple of keys.
An instance just returns the argument of its __getitem__ call.
"""

def __getitem__(self, keys):
return keys


# A multidimensional-indexable object that returns its index keys, so we can
# use multidimensional-indexing notation to specify a slicing expression.
Slices = SliceTranslator()


class Test__getitem__slicing(tests.IrisTest):
def _check_slicing(
self, test_shape, indices, result_shape, data_was_fetched=True
):
# Check behaviour of the getitem call with specific slicings.
# Especially: check cases where a fetch does *not* read from the file.
# This is necessary because, since Dask 2.0, the "from_array" function
# takes a zero-length slice of its array argument, to capture array
# metadata, and in those cases we want to avoid file access.
test_dtype = np.dtype(np.float32)
proxy = PPDataProxy(
shape=test_shape,
src_dtype=test_dtype,
path=None,
offset=None,
data_len=None,
lbpack=0, # Note: a 'real' value is needed.
boundary_packing=None,
mdi=None,
)

# Mock out the file-open call, to see if the file would be read.
builtin_open_func_name = "builtins.open"
mock_fileopen = self.patch(builtin_open_func_name)

# Also mock out the 'databytes_to_shaped_array' call, to fake minimal
# operation in the cases where file-open *does* get called.
fake_data = np.zeros(test_shape, dtype=test_dtype)
self.patch(
"iris.fileformats.pp._data_bytes_to_shaped_array",
mock.MagicMock(return_value=fake_data),
)

# Test the requested indexing operation.
result = proxy.__getitem__(indices)

# Check the behaviour and results were as expected.
self.assertEqual(mock_fileopen.called, data_was_fetched)
self.assertIsInstance(result, np.ndarray)
self.assertEqual(result.dtype, test_dtype)
self.assertEqual(result.shape, result_shape)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests are no longer necessary as we now always open the file when calling __getitem__ making
self.assertEqual(mock_fileopen.called, data_was_fetched)
redundant.

The other checks are testing functionality we have tests for elsewhere.


def test_slicing_1d_normal(self):
# A 'normal' 1d testcase with no empty slices.
self._check_slicing(
test_shape=(3,),
indices=Slices[1:10],
result_shape=(2,),
data_was_fetched=True,
)

def test_slicing_1d_empty(self):
# A 1d testcase with an empty slicing.
self._check_slicing(
test_shape=(3,),
indices=Slices[0:0],
result_shape=(0,),
data_was_fetched=False,
)

def test_slicing_2d_normal(self):
# A 2d testcase with no empty slices.
self._check_slicing(
test_shape=(3, 4),
indices=Slices[2, :3],
result_shape=(3,),
data_was_fetched=True,
)

def test_slicing_2d_allempty(self):
# A 2d testcase with all empty slices.
self._check_slicing(
test_shape=(3, 4),
indices=Slices[0:0, 0:0],
result_shape=(0, 0),
data_was_fetched=False,
)

def test_slicing_2d_empty_dim0(self):
# A 2d testcase with an empty slice.
self._check_slicing(
test_shape=(3, 4),
indices=Slices[0:0],
result_shape=(0, 4),
data_was_fetched=False,
)

def test_slicing_2d_empty_dim1(self):
# A 2d testcase with an empty slice, and an integer index.
self._check_slicing(
test_shape=(3, 4),
indices=Slices[1, 0:0],
result_shape=(0,),
data_was_fetched=False,
)

def test_slicing_complex(self):
# Multiple dimensions with multiple empty slices.
self._check_slicing(
test_shape=(3, 4, 2, 5, 6, 3, 7),
indices=Slices[1:3, 2, 0:0, :, 1:1, :100],
result_shape=(2, 0, 5, 0, 3, 7),
data_was_fetched=False,
)


if __name__ == "__main__":
tests.main()
61 changes: 0 additions & 61 deletions lib/iris/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -960,67 +960,6 @@ def __lt__(self, other):
return NotImplemented


def _array_slice_ifempty(keys, shape, dtype):
"""
Detect cases where an array slice will contain no data, as it contains a
zero-length dimension, and produce an equivalent result for those cases.

The function indicates 'empty' slicing cases, by returning an array equal
to the slice result in those cases.

Args:

* keys (indexing key, or tuple of keys):
The argument from an array __getitem__ call.
Only tuples of integers and slices are supported, in particular no
newaxis, ellipsis or array keys.
These are the types of array access usage we expect from Dask.
* shape (tuple of int):
The shape of the array being indexed.
* dtype (numpy.dtype):
The dtype of the array being indexed.

Returns:
result (np.ndarray or None):
If 'keys' contains a slice(0, 0), this is an ndarray of the correct
resulting shape and provided dtype.
Otherwise it is None.

.. note::

This is used to prevent DataProxy arraylike objects from fetching their
file data when wrapped as Dask arrays.
This is because, for Dask >= 2.0, the "dask.array.from_array" call
performs a fetch like [0:0, 0:0, ...], to 'snapshot' array metadata.
This function enables us to avoid triggering a file data fetch in those
cases : This is consistent because the result will not contain any
actual data content.

"""
# Convert a single key into a 1-tuple, so we always have a tuple of keys.
if isinstance(keys, tuple):
keys_tuple = keys
else:
keys_tuple = (keys,)

if any(key == slice(0, 0) for key in keys_tuple):
# An 'empty' slice is present : Return a 'fake' array instead.
target_shape = list(shape)
for i_dim, key in enumerate(keys_tuple):
if key == slice(0, 0):
# Reduce dims with empty slicing to length 0.
target_shape[i_dim] = 0
# Create a prototype result : no memory usage, as some dims are 0.
result = np.zeros(target_shape, dtype=dtype)
# Index with original keys to produce the desired result shape.
# Note : also ok in 0-length dims, as the slice is always '0:0'.
result = result[keys]
else:
result = None

return result


def create_temp_filename(suffix=""):
"""Return a temporary file name.

Expand Down