Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions lib/iris/_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,31 @@ def array_masked_to_nans(array, mask=None):
array = array.astype(np.dtype('f8'))
array[mask] = np.nan
return array


def multidim_lazy_stack(stack):
"""
Recursively build a multidimensional stacked dask array.

This is needed because dask.array.stack only accepts a 1-dimensional list.

Args:

* stack:
+ An ndarray of dask arrays.

Returns:
The input array converted to a lazy dask array.

"""
if stack.ndim == 0:
# A 0-d array cannot be stacked.
result = stack.item()
elif stack.ndim == 1:
# Another base case : simple 1-d goes direct in dask.
result = da.stack(list(stack))
else:
# Recurse because dask.stack does not do multi-dimensional.
result = da.stack([multidim_lazy_stack(subarray)
for subarray in stack])
return result
25 changes: 2 additions & 23 deletions lib/iris/_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import numpy as np
import numpy.ma as ma

from iris._lazy_data import array_masked_to_nans, as_lazy_data, is_lazy_data
from iris._lazy_data import as_lazy_data, is_lazy_data, multidim_lazy_stack
import iris.cube
import iris.coords
import iris.exceptions
Expand Down Expand Up @@ -1069,27 +1069,6 @@ def derive_space(groups, relation_matrix, positions, function_matrix=None):
return space


def _multidim_daskstack(stack):
"""
Recursively build a multidensional stacked dask array.

The argument is an ndarray of dask arrays.
This is needed because dask.array.stack only accepts a 1-dimensional list.

"""
if stack.ndim == 0:
# A 0-d array cannot be merged.
result = stack.item()
elif stack.ndim == 1:
# 'Another' base case : simple 1-d goes direct in dask.
result = da.stack(list(stack))
else:
# Recurse because dask.stack does not do multi-dimensional.
result = da.stack([_multidim_daskstack(subarray)
for subarray in stack])
return result


class ProtoCube(object):
"""
Framework for merging source-cubes into one or more higher
Expand Down Expand Up @@ -1234,7 +1213,7 @@ def merge(self, unique=True):
data = as_lazy_data(data, chunks=data.shape)
stack[nd_index] = data

merged_data = _multidim_daskstack(stack)
merged_data = multidim_lazy_stack(stack)
if all_have_data:
# All inputs were concrete, so turn the result back into a
# normal array.
Expand Down
19 changes: 7 additions & 12 deletions lib/iris/fileformats/um/_fast_load_structured_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,10 @@
from netCDF4 import netcdftime
import numpy as np

from iris._lazy_data import as_lazy_data, multidim_lazy_stack
from iris.fileformats.um._optimal_array_structuring import \
optimal_array_structure

from iris.fileformats.pp import PPField3
from iris._lazy_data import as_lazy_data


class FieldCollation(object):
"""
Expand Down Expand Up @@ -89,15 +87,12 @@ def data(self):
if not self._structure_calculated:
self._calculate_structure()
if self._data_cache is None:
data_arrays = [as_lazy_data(f._data, chunks=f._data.shape)
for f in self.fields]
vector_dims_list = list(self.vector_dims_shape)
vector_dims_list.reverse()
self._data_cache = data_arrays
for size in vector_dims_list:
self._data_cache = [da.stack(self._data_cache[i:i+size]) for i
in range(0, len(self._data_cache), size)]
self._data_cache, = self._data_cache
stack = np.empty(self.vector_dims_shape, 'object')
for nd_index, field in zip(np.ndindex(self.vector_dims_shape),
self.fields):
stack[nd_index] = as_lazy_data(field._data,
chunks=field._data.shape)
self._data_cache = multidim_lazy_stack(stack)
return self._data_cache

@property
Expand Down
2 changes: 1 addition & 1 deletion lib/iris/tests/unit/lazy_data/test_array_masked_to_nans.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#
# You should have received a copy of the GNU Lesser General Public License
# along with Iris. If not, see <http://www.gnu.org/licenses/>.
"""Test :meth:`iris._lazy data.array_masked_to_nans` method."""
"""Test function :func:`iris._lazy data.array_masked_to_nans`."""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


from __future__ import (absolute_import, division, print_function)
from six.moves import (filter, input, map, range, zip) # noqa
Expand Down
2 changes: 1 addition & 1 deletion lib/iris/tests/unit/lazy_data/test_is_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#
# You should have received a copy of the GNU Lesser General Public License
# along with Iris. If not, see <http://www.gnu.org/licenses/>.
"""Test :meth:`iris._lazy data.is_lazy_data` method."""
"""Test function :func:`iris._lazy data.is_lazy_data`."""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


from __future__ import (absolute_import, division, print_function)
from six.moves import (filter, input, map, range, zip) # noqa
Expand Down
63 changes: 63 additions & 0 deletions lib/iris/tests/unit/lazy_data/test_multidim_lazy_stack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# (C) British Crown Copyright 2017, Met Office
#
# This file is part of Iris.
#
# Iris is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Iris is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Iris. If not, see <http://www.gnu.org/licenses/>.
"""Test function :func:`iris._lazy data.multidim_lazy_stack`."""

from __future__ import (absolute_import, division, print_function)
from six.moves import (filter, input, map, range, zip) # noqa

# Import iris.tests first so that some things can be initialised before
# importing anything else.
import iris.tests as tests

import numpy as np
import dask.array as da

from iris._lazy_data import as_lazy_data, multidim_lazy_stack


class Test_multidim_lazy_stack(tests.IrisTest):
def _check(self, stack_shape):
vals = np.arange(np.prod(stack_shape)).reshape(stack_shape)
stack = np.empty(stack_shape, 'object')
# Define the shape of each element in the stack.
stack_element_shape = (4, 5)
expected = np.empty(stack_shape + stack_element_shape,
dtype=int)
for index, val in np.ndenumerate(vals):
stack[index] = as_lazy_data(val * np.ones(stack_element_shape))

expected[index] = val
result = multidim_lazy_stack(stack)
self.assertEqual(result.shape, stack_shape + stack_element_shape)
self.assertIsInstance(result, da.core.Array)
self.assertArrayAllClose(result.compute(), expected)

def test_0d_lazy_stack(self):
shape = ()
result = self._check(shape)

def test_1d_lazy_stack(self):
shape = (2,)
result = self._check(shape)

def test_2d_lazy_stack(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any chance of a >2D test?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems a bit unnecessary? The 2d test is already testing the recursivity

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, however part of testing is checking edge cases, and I don't know what would happen if I passed a 7D array to it. Evidently we don't need to test all possible dimensionalities (!), but it would be good to test something that's outside of the boundary of logical intent for the functionality being tested. This is just in case an unsafe assumption has been made about how the function will be used; "of course, no-one would ever use this for more than a 2D input".

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not hung up on this though, so I'm not going to make a right fuss if it's left out. @bjlittle @pp-mo do you have any inputs to this?

Copy link
Member

@pp-mo pp-mo Mar 10, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From a mockist / white-box-y point of view, the existing tests do already cover all 3 code branches.
The implementation relies on iteration over the passed stack to deconstruct the input one dimension at a time, but 2d is already checking that.
One more would do no harm, I suppose, but I don't really expect it to go wrong !

shape = (3, 2)
result = self._check(shape)


if __name__ == '__main__':
tests.main()