Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 30 additions & 10 deletions lib/iris/_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,24 +69,44 @@ def as_lazy_data(data, chunks=_MAX_CHUNK_SIZE):
if not is_lazy_data(data):
if isinstance(data, ma.MaskedArray):
data = array_masked_to_nans(data)
data = data.data
data = da.from_array(data, chunks=chunks)
return data


def array_masked_to_nans(array, mask=None):
def array_masked_to_nans(array):
"""
Convert a masked array to an `ndarray` with NaNs at masked points.
Convert a masked array to a NumPy `ndarray` filled with NaN values. Input
NumPy arrays with no mask are returned unchanged.
This is used for dask integration, as dask does not support masked arrays.
Note that any fill value will be lost.

Args:

* array:
A NumPy `ndarray` or masked array.

Returns:
A NumPy `ndarray`. This is the input array if unmasked, or an array
of floating-point values with NaN values where the mask was `True` if
the input array is masked.

.. note::
The fill value and mask of the input masked array will be lost.

.. note::
Integer masked arrays are cast to 8-byte floats because NaN is a
floating-point value.

"""
if mask is None:
mask = array.mask
if array.dtype.kind == 'i':
array = array.astype(np.dtype('f8'))
array[mask] = np.nan
return array
if not ma.isMaskedArray(array):
result = array
else:
if ma.is_masked(array):
if array.dtype.kind == 'i':
array = array.astype(np.dtype('f8'))
mask = array.mask
array[mask] = np.nan
result = array.data
return result
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is nice 👍



def multidim_lazy_stack(stack):
Expand Down
1 change: 0 additions & 1 deletion lib/iris/fileformats/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,6 @@ def __getitem__(self, keys):
dataset.close()
if isinstance(var, ma.MaskedArray):
var = array_masked_to_nans(var)
var = var.data
return var

def __repr__(self):
Expand Down
6 changes: 4 additions & 2 deletions lib/iris/fileformats/pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,9 +1037,11 @@ def _data_bytes_to_shaped_array(data_bytes, lbpack, boundary_packing,
# Reform in row-column order
data.shape = data_shape

# Mask the array?
# Convert mdi to NaN.
if mdi in data:
data = array_masked_to_nans(data, data == mdi)
if data.dtype.kind == 'i':
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we pay attention to the name of array_masked_to_nans, it tells us that it will convert a masked array to a nan-filled array. If we don't have masked data (such as here), let's just not use it and instead do the processing we actually need to. That is, we cast int -> float if necessary and replace mdi values with NaN.

data = data.astype(np.dtype('f8'))
data[data == mdi] = np.nan

return data

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ failed to merge into a single cube.
cube.attributes keys differ: 'stuffed'
cube.cell_methods differ
cube.shape differs: (3,) != (2,)
cube data dtype differs: int64 != float64
cube data dtype differs: int64 != int8
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is fun: it looks like the changes in this PR have fixed this test result -- note how the dtype specified in the error text here now matches to the dtype set in the test code...

Copy link
Member

@lbdreyer lbdreyer Mar 14, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this was what it originally was, but as part of the previous dask work it was changed to float64 because the cube data array was a masked array. However the masked array doesn't contain any masked values so the dask array wouldn't contain any nans and therefore could be int.

The correct handling of checking for ma.isMaskedArray(array) and ma.is_masked(array) in this PR corrects this behaviour and this test.

68 changes: 48 additions & 20 deletions lib/iris/tests/unit/lazy_data/test_array_masked_to_nans.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,42 +31,70 @@


class Test(tests.IrisTest):
def test_masked(self):
masked_array = ma.masked_array([[1.0, 2.0], [3.0, 4.0]],
mask=[[0, 1], [0, 0]])
def _common_checks(self, result):
self.assertIsInstance(result, np.ndarray)
self.assertFalse(ma.isMaskedArray(result))

result = array_masked_to_nans(masked_array).data
def test_masked(self):
mask = [[False, True], [False, False]]
masked_array = ma.masked_array([[1.0, 2.0], [3.0, 4.0]], mask=mask)

self.assertIsInstance(result, np.ndarray)
self.assertFalse(isinstance(result, ma.MaskedArray))
self.assertFalse(ma.is_masked(result))
result = array_masked_to_nans(masked_array)

self.assertArrayAllClose(np.isnan(result),
[[False, True], [False, False]])
self._common_checks(result)
self.assertArrayAllClose(np.isnan(result), mask)
result[0, 1] = 777.7
self.assertArrayAllClose(result, [[1.0, 777.7], [3.0, 4.0]])

def test_unmasked(self):
unmasked_array = np.array([1.0, 2.0])
result = array_masked_to_nans(unmasked_array)
# Non-masked array is returned as-is, without copying.
self.assertIs(result, unmasked_array)

def test_empty_mask(self):
Copy link
Member

@pp-mo pp-mo Mar 10, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Masked arrays being the horrible things they are, it might be worth checking also the case where the 'mask' keyword is not set (the actual default is "mask=np.ma.nomask").
For a ghastly distinction, this is not the same thing as mask=zeros(...) or mask=None or mask=False, all of which allocate an actual (empty) mask array:

>>> print np.ma.masked_array([1, 2], mask=False).mask
[False False]
>>> print np.ma.masked_array([1, 2], mask=None).mask
[False False]
>>> print np.ma.masked_array([1, 2]).mask
False
>>> print np.ma.masked_array([1, 2], mask=np.ma.nomask).mask
False
>>> 

Urrrgh! Don't get me started...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this is getting a little close to testing the actual ma implementation! Which, perhaps, it could do with a little more of...

masked_array = ma.masked_array([1.0, 2.0], mask=[0, 0])

result = array_masked_to_nans(masked_array).data
result = array_masked_to_nans(masked_array)

self.assertIsInstance(result, np.ndarray)
self.assertFalse(isinstance(result, ma.MaskedArray))
self.assertFalse(ma.is_masked(result))
self._common_checks(result)
self.assertArrayAllClose(result, masked_array.data)

def test_no_mask(self):
masked_array = ma.masked_array([1.0, 2.0], mask=ma.nomask)

# self.assertIs(result, masked_array.data)
# NOTE: Wanted to check that result in this case is delivered without
# copying. However, it seems that ".data" is not just an internal
# reference, so copying *does* occur in this case.
result = array_masked_to_nans(masked_array)

self._common_checks(result)
self.assertArrayAllClose(result, masked_array.data)

def test_non_masked(self):
unmasked_array = np.array([1.0, 2.0])
result = array_masked_to_nans(unmasked_array, mask=False)
def test_masked__integers(self):
mask = [[False, True], [False, False]]
masked_array = ma.masked_array([[1, 2], [3, 4]], mask=mask)

result = array_masked_to_nans(masked_array)

self._common_checks(result)
self.assertEqual(result.dtype, np.dtype('f8'))
self.assertArrayAllClose(np.isnan(result), mask)
result[0, 1] = 777.7
self.assertArrayAllClose(result, [[1.0, 777.7], [3.0, 4.0]])

def test_unmasked__integers(self):
unmasked_array = np.array([1, 2])
result = array_masked_to_nans(unmasked_array)
# Non-masked array is returned as-is, without copying.
self.assertIs(result, unmasked_array)

def test_no_mask__integers(self):
datatype = np.dtype('i4')
masked_array = ma.masked_array([1, 2], dtype=datatype, mask=ma.nomask)

result = array_masked_to_nans(masked_array)

self._common_checks(result)
self.assertEqual(result.dtype, datatype)


if __name__ == '__main__':
tests.main()