diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 2d643eb808..e23a1963b5 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -69,24 +69,44 @@ def as_lazy_data(data, chunks=_MAX_CHUNK_SIZE): if not is_lazy_data(data): if isinstance(data, ma.MaskedArray): data = array_masked_to_nans(data) - data = data.data data = da.from_array(data, chunks=chunks) return data -def array_masked_to_nans(array, mask=None): +def array_masked_to_nans(array): """ - Convert a masked array to an `ndarray` with NaNs at masked points. + Convert a masked array to a NumPy `ndarray` filled with NaN values. Input + NumPy arrays with no mask are returned unchanged. This is used for dask integration, as dask does not support masked arrays. - Note that any fill value will be lost. + + Args: + + * array: + A NumPy `ndarray` or masked array. + + Returns: + A NumPy `ndarray`. This is the input array if unmasked, or an array + of floating-point values with NaN values where the mask was `True` if + the input array is masked. + + .. note:: + The fill value and mask of the input masked array will be lost. + + .. note:: + Integer masked arrays are cast to 8-byte floats because NaN is a + floating-point value. """ - if mask is None: - mask = array.mask - if array.dtype.kind == 'i': - array = array.astype(np.dtype('f8')) - array[mask] = np.nan - return array + if not ma.isMaskedArray(array): + result = array + else: + if ma.is_masked(array): + if array.dtype.kind == 'i': + array = array.astype(np.dtype('f8')) + mask = array.mask + array[mask] = np.nan + result = array.data + return result def multidim_lazy_stack(stack): diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index edb54abe74..ecbf562378 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -396,7 +396,6 @@ def __getitem__(self, keys): dataset.close() if isinstance(var, ma.MaskedArray): var = array_masked_to_nans(var) - var = var.data return var def __repr__(self): diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py index f1a7019cb8..dc8a6d34e2 100644 --- a/lib/iris/fileformats/pp.py +++ b/lib/iris/fileformats/pp.py @@ -1037,9 +1037,11 @@ def _data_bytes_to_shaped_array(data_bytes, lbpack, boundary_packing, # Reform in row-column order data.shape = data_shape - # Mask the array? + # Convert mdi to NaN. if mdi in data: - data = array_masked_to_nans(data, data == mdi) + if data.dtype.kind == 'i': + data = data.astype(np.dtype('f8')) + data[data == mdi] = np.nan return data diff --git a/lib/iris/tests/results/unit/merge/ProtoCube/register__CubeSig/noise.txt b/lib/iris/tests/results/unit/merge/ProtoCube/register__CubeSig/noise.txt index 3191fd4af6..c330646e72 100644 --- a/lib/iris/tests/results/unit/merge/ProtoCube/register__CubeSig/noise.txt +++ b/lib/iris/tests/results/unit/merge/ProtoCube/register__CubeSig/noise.txt @@ -4,4 +4,4 @@ failed to merge into a single cube. cube.attributes keys differ: 'stuffed' cube.cell_methods differ cube.shape differs: (3,) != (2,) - cube data dtype differs: int64 != float64 \ No newline at end of file + cube data dtype differs: int64 != int8 \ No newline at end of file diff --git a/lib/iris/tests/unit/lazy_data/test_array_masked_to_nans.py b/lib/iris/tests/unit/lazy_data/test_array_masked_to_nans.py index 55ead790f0..8f210dd495 100644 --- a/lib/iris/tests/unit/lazy_data/test_array_masked_to_nans.py +++ b/lib/iris/tests/unit/lazy_data/test_array_masked_to_nans.py @@ -31,42 +31,70 @@ class Test(tests.IrisTest): - def test_masked(self): - masked_array = ma.masked_array([[1.0, 2.0], [3.0, 4.0]], - mask=[[0, 1], [0, 0]]) + def _common_checks(self, result): + self.assertIsInstance(result, np.ndarray) + self.assertFalse(ma.isMaskedArray(result)) - result = array_masked_to_nans(masked_array).data + def test_masked(self): + mask = [[False, True], [False, False]] + masked_array = ma.masked_array([[1.0, 2.0], [3.0, 4.0]], mask=mask) - self.assertIsInstance(result, np.ndarray) - self.assertFalse(isinstance(result, ma.MaskedArray)) - self.assertFalse(ma.is_masked(result)) + result = array_masked_to_nans(masked_array) - self.assertArrayAllClose(np.isnan(result), - [[False, True], [False, False]]) + self._common_checks(result) + self.assertArrayAllClose(np.isnan(result), mask) result[0, 1] = 777.7 self.assertArrayAllClose(result, [[1.0, 777.7], [3.0, 4.0]]) + def test_unmasked(self): + unmasked_array = np.array([1.0, 2.0]) + result = array_masked_to_nans(unmasked_array) + # Non-masked array is returned as-is, without copying. + self.assertIs(result, unmasked_array) + def test_empty_mask(self): masked_array = ma.masked_array([1.0, 2.0], mask=[0, 0]) - result = array_masked_to_nans(masked_array).data + result = array_masked_to_nans(masked_array) - self.assertIsInstance(result, np.ndarray) - self.assertFalse(isinstance(result, ma.MaskedArray)) - self.assertFalse(ma.is_masked(result)) + self._common_checks(result) + self.assertArrayAllClose(result, masked_array.data) + + def test_no_mask(self): + masked_array = ma.masked_array([1.0, 2.0], mask=ma.nomask) - # self.assertIs(result, masked_array.data) - # NOTE: Wanted to check that result in this case is delivered without - # copying. However, it seems that ".data" is not just an internal - # reference, so copying *does* occur in this case. + result = array_masked_to_nans(masked_array) + + self._common_checks(result) self.assertArrayAllClose(result, masked_array.data) - def test_non_masked(self): - unmasked_array = np.array([1.0, 2.0]) - result = array_masked_to_nans(unmasked_array, mask=False) + def test_masked__integers(self): + mask = [[False, True], [False, False]] + masked_array = ma.masked_array([[1, 2], [3, 4]], mask=mask) + + result = array_masked_to_nans(masked_array) + + self._common_checks(result) + self.assertEqual(result.dtype, np.dtype('f8')) + self.assertArrayAllClose(np.isnan(result), mask) + result[0, 1] = 777.7 + self.assertArrayAllClose(result, [[1.0, 777.7], [3.0, 4.0]]) + + def test_unmasked__integers(self): + unmasked_array = np.array([1, 2]) + result = array_masked_to_nans(unmasked_array) # Non-masked array is returned as-is, without copying. self.assertIs(result, unmasked_array) + def test_no_mask__integers(self): + datatype = np.dtype('i4') + masked_array = ma.masked_array([1, 2], dtype=datatype, mask=ma.nomask) + + result = array_masked_to_nans(masked_array) + + self._common_checks(result) + self.assertEqual(result.dtype, datatype) + if __name__ == '__main__': tests.main()