Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions lib/iris/_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,14 @@ def as_concrete_data(data):
return data


def lazy_masked_fill_value(data):
def get_fill_value(data):
"""
Return the fill value of a lazy masked array.
Return the fill value of a concrete or lazy masked array.

Args:

* data:
A dask array, NumPy `ndarray` or masked array
A dask array, NumPy `ndarray` or masked array.

Returns:
The fill value of `data` if `data` represents a masked array, or None.
Expand All @@ -147,9 +147,14 @@ def lazy_masked_fill_value(data):
# If lazy, get the smallest slice of the data from which we can retrieve
# the fill_value.
if is_lazy_data(data):
inds = tuple([0] * (data.ndim-1))
smallest_slice = data[inds][:0]
data = as_concrete_data(smallest_slice)
inds = [0] * data.ndim
Copy link
Member

@pp-mo pp-mo Aug 14, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I would use data = data.flatten()[0:0] instead of all this : The intent would be clearer, no ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Functionally quite different though - for NetCDF that could mean loading the entire dataset and then discarding all the actual values.

if inds:
inds[-1] = slice(0, 1)
data = data[tuple(inds)]
data = as_concrete_data(data)
else:
if isinstance(data, ma.core.MaskedConstant):
data = ma.array(data.data, mask=data.mask)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth a comment why you need to do this ?


# Now get the fill value.
fill_value = None
Expand Down
17 changes: 3 additions & 14 deletions lib/iris/fileformats/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
import iris.fileformats._pyke_rules
import iris.io
import iris.util
from iris._lazy_data import as_lazy_data, lazy_masked_fill_value
from iris._lazy_data import as_lazy_data, get_fill_value

# Show Pyke inference engine statistics.
DEBUG = False
Expand Down Expand Up @@ -1926,10 +1926,7 @@ def set_packing_ncattrs(cfvar):

if packing is None:
# Determine whether there is a cube MDI value.
if ma.isMaskedArray(cube.data):
fill_value = cube.data.fill_value
else:
fill_value = None
fill_value = get_fill_value(cube.core_data())

# Get the values in a form which is valid for the file format.
data = self._ensure_valid_dtype(cube.data, 'cube', cube)
Expand All @@ -1946,16 +1943,8 @@ def set_packing_ncattrs(cfvar):

else:
# Create the cube CF-netCDF data variable.
# Set `fill_value` if the data array is masked. If the data array
# is lazy masked, we realise the smallest possible slice of the
# array and retrieve the fill value from that.
if packing is None:
if not cube.has_lazy_data() and ma.isMaskedArray(cube.data):
fill_value = cube.data.fill_value
elif cube.has_lazy_data():
fill_value = lazy_masked_fill_value(cube.lazy_data())
else:
fill_value = None
fill_value = get_fill_value(cube.core_data())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 for simplification here !

dtype = cube.dtype.newbyteorder('=')

cf_var = self._dataset.createVariable(
Expand Down
75 changes: 75 additions & 0 deletions lib/iris/tests/unit/lazy_data/test_get_fill_value.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# (C) British Crown Copyright 2017, Met Office
#
# This file is part of Iris.
#
# Iris is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Iris is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Iris. If not, see <http://www.gnu.org/licenses/>.
"""Test the function :func:`iris._lazy data.get_fill_value`."""

from __future__ import (absolute_import, division, print_function)
from six.moves import (filter, input, map, range, zip) # noqa

# Import iris.tests first so that some things can be initialised before
# importing anything else.
import iris.tests as tests

import dask.array as da
import numpy as np
import numpy.ma as ma

from iris._lazy_data import as_lazy_data, get_fill_value


class Test_get_fill_value(tests.IrisTest):
def setUp(self):
# Array shape and fill-value.
spec = [((2, 3, 4, 5), -1), # 4d array
((2, 3, 4), -2), # 3d array
((2, 3), -3), # 2d array
((2,), -4), # 1d array
((), -5)] # 0d array
self.arrays = [np.empty(shape) for (shape, _) in spec]
self.masked = [ma.empty(shape, fill_value=fv) for (shape, fv) in spec]
self.lazy_arrays = [as_lazy_data(array) for array in self.arrays]
self.lazy_masked = [as_lazy_data(array) for array in self.masked]
# Add the masked constant case.
mc = ma.array([0], mask=True)[0]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be better to specify a definite, non-default fill-value here, like all the other cases.
Just to make sure we're not getting a default instead.

self.masked.append(mc)
self.lazy_masked.append(as_lazy_data(mc))
# Collect the expected fill-values.
self.expected_fill_values = [fv for (_, fv) in spec]
mc_fill_value = ma.masked_array(0, dtype=mc.dtype).fill_value
self.expected_fill_values.append(mc_fill_value)

def test_arrays(self):
for array in self.arrays:
self.assertIsNone(get_fill_value(array))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As you're testing multiple cases, it would be nice to have a custom context message so if one fails the output shows which one it was.
That goes for all these tests...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the moment to use test parametrisation from pytest? It's the future 😉

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Meanwhile, back in the past... nose has poor man's parametrisation in the form of test generators: http://nose.readthedocs.io/en/latest/writing_tests.html#test-generators


def test_masked(self):
for array, expected in zip(self.masked, self.expected_fill_values):
result = get_fill_value(array)
self.assertEqual(result, expected)

def test_lazy_arrays(self):
for array in self.lazy_arrays:
self.assertIsNone(get_fill_value(array))

def test_lazy_masked(self):
for array, expected in zip(self.lazy_masked,
self.expected_fill_values):
result = get_fill_value(array)
self.assertEqual(result, expected)


if __name__ == '__main__':
tests.main()
86 changes: 0 additions & 86 deletions lib/iris/tests/unit/lazy_data/test_lazy_masked_fill_value.py

This file was deleted.