diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index e96f24da86..2d643eb808 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -87,3 +87,31 @@ def array_masked_to_nans(array, mask=None): array = array.astype(np.dtype('f8')) array[mask] = np.nan return array + + +def multidim_lazy_stack(stack): + """ + Recursively build a multidimensional stacked dask array. + + This is needed because dask.array.stack only accepts a 1-dimensional list. + + Args: + + * stack: ++ An ndarray of dask arrays. + + Returns: + The input array converted to a lazy dask array. + + """ + if stack.ndim == 0: + # A 0-d array cannot be stacked. + result = stack.item() + elif stack.ndim == 1: + # Another base case : simple 1-d goes direct in dask. + result = da.stack(list(stack)) + else: + # Recurse because dask.stack does not do multi-dimensional. + result = da.stack([multidim_lazy_stack(subarray) + for subarray in stack]) + return result diff --git a/lib/iris/_merge.py b/lib/iris/_merge.py index bb943b9415..972e962d81 100644 --- a/lib/iris/_merge.py +++ b/lib/iris/_merge.py @@ -33,7 +33,7 @@ import numpy as np import numpy.ma as ma -from iris._lazy_data import array_masked_to_nans, as_lazy_data, is_lazy_data +from iris._lazy_data import as_lazy_data, is_lazy_data, multidim_lazy_stack import iris.cube import iris.coords import iris.exceptions @@ -1069,27 +1069,6 @@ def derive_space(groups, relation_matrix, positions, function_matrix=None): return space -def _multidim_daskstack(stack): - """ - Recursively build a multidensional stacked dask array. - - The argument is an ndarray of dask arrays. - This is needed because dask.array.stack only accepts a 1-dimensional list. - - """ - if stack.ndim == 0: - # A 0-d array cannot be merged. - result = stack.item() - elif stack.ndim == 1: - # 'Another' base case : simple 1-d goes direct in dask. - result = da.stack(list(stack)) - else: - # Recurse because dask.stack does not do multi-dimensional. - result = da.stack([_multidim_daskstack(subarray) - for subarray in stack]) - return result - - class ProtoCube(object): """ Framework for merging source-cubes into one or more higher @@ -1234,7 +1213,7 @@ def merge(self, unique=True): data = as_lazy_data(data, chunks=data.shape) stack[nd_index] = data - merged_data = _multidim_daskstack(stack) + merged_data = multidim_lazy_stack(stack) if all_have_data: # All inputs were concrete, so turn the result back into a # normal array. diff --git a/lib/iris/fileformats/um/_fast_load_structured_fields.py b/lib/iris/fileformats/um/_fast_load_structured_fields.py index 25c7f21059..402c93803f 100644 --- a/lib/iris/fileformats/um/_fast_load_structured_fields.py +++ b/lib/iris/fileformats/um/_fast_load_structured_fields.py @@ -32,12 +32,10 @@ from netCDF4 import netcdftime import numpy as np +from iris._lazy_data import as_lazy_data, multidim_lazy_stack from iris.fileformats.um._optimal_array_structuring import \ optimal_array_structure -from iris.fileformats.pp import PPField3 -from iris._lazy_data import as_lazy_data - class FieldCollation(object): """ @@ -89,15 +87,12 @@ def data(self): if not self._structure_calculated: self._calculate_structure() if self._data_cache is None: - data_arrays = [as_lazy_data(f._data, chunks=f._data.shape) - for f in self.fields] - vector_dims_list = list(self.vector_dims_shape) - vector_dims_list.reverse() - self._data_cache = data_arrays - for size in vector_dims_list: - self._data_cache = [da.stack(self._data_cache[i:i+size]) for i - in range(0, len(self._data_cache), size)] - self._data_cache, = self._data_cache + stack = np.empty(self.vector_dims_shape, 'object') + for nd_index, field in zip(np.ndindex(self.vector_dims_shape), + self.fields): + stack[nd_index] = as_lazy_data(field._data, + chunks=field._data.shape) + self._data_cache = multidim_lazy_stack(stack) return self._data_cache @property diff --git a/lib/iris/tests/unit/lazy_data/test_array_masked_to_nans.py b/lib/iris/tests/unit/lazy_data/test_array_masked_to_nans.py index de55026e55..55ead790f0 100644 --- a/lib/iris/tests/unit/lazy_data/test_array_masked_to_nans.py +++ b/lib/iris/tests/unit/lazy_data/test_array_masked_to_nans.py @@ -14,7 +14,7 @@ # # You should have received a copy of the GNU Lesser General Public License # along with Iris. If not, see . -"""Test :meth:`iris._lazy data.array_masked_to_nans` method.""" +"""Test function :func:`iris._lazy data.array_masked_to_nans`.""" from __future__ import (absolute_import, division, print_function) from six.moves import (filter, input, map, range, zip) # noqa diff --git a/lib/iris/tests/unit/lazy_data/test_is_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_is_lazy_data.py index 1c40d9a475..5a9595fa1f 100644 --- a/lib/iris/tests/unit/lazy_data/test_is_lazy_data.py +++ b/lib/iris/tests/unit/lazy_data/test_is_lazy_data.py @@ -14,7 +14,7 @@ # # You should have received a copy of the GNU Lesser General Public License # along with Iris. If not, see . -"""Test :meth:`iris._lazy data.is_lazy_data` method.""" +"""Test function :func:`iris._lazy data.is_lazy_data`.""" from __future__ import (absolute_import, division, print_function) from six.moves import (filter, input, map, range, zip) # noqa diff --git a/lib/iris/tests/unit/lazy_data/test_multidim_lazy_stack.py b/lib/iris/tests/unit/lazy_data/test_multidim_lazy_stack.py new file mode 100644 index 0000000000..80839b02b5 --- /dev/null +++ b/lib/iris/tests/unit/lazy_data/test_multidim_lazy_stack.py @@ -0,0 +1,63 @@ +# (C) British Crown Copyright 2017, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +"""Test function :func:`iris._lazy data.multidim_lazy_stack`.""" + +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + +import numpy as np +import dask.array as da + +from iris._lazy_data import as_lazy_data, multidim_lazy_stack + + +class Test_multidim_lazy_stack(tests.IrisTest): + def _check(self, stack_shape): + vals = np.arange(np.prod(stack_shape)).reshape(stack_shape) + stack = np.empty(stack_shape, 'object') + # Define the shape of each element in the stack. + stack_element_shape = (4, 5) + expected = np.empty(stack_shape + stack_element_shape, + dtype=int) + for index, val in np.ndenumerate(vals): + stack[index] = as_lazy_data(val * np.ones(stack_element_shape)) + + expected[index] = val + result = multidim_lazy_stack(stack) + self.assertEqual(result.shape, stack_shape + stack_element_shape) + self.assertIsInstance(result, da.core.Array) + self.assertArrayAllClose(result.compute(), expected) + + def test_0d_lazy_stack(self): + shape = () + result = self._check(shape) + + def test_1d_lazy_stack(self): + shape = (2,) + result = self._check(shape) + + def test_2d_lazy_stack(self): + shape = (3, 2) + result = self._check(shape) + + +if __name__ == '__main__': + tests.main()