Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 75 additions & 5 deletions lib/iris/_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,14 @@ def array_masked_to_nans(array):
result = array
else:
if ma.is_masked(array):
if array.dtype.kind == 'i':
array = array.astype(np.dtype('f8'))
mask = array.mask
array[mask] = np.nan
result = array.data
if array.dtype.kind == 'i':
result = array.data.astype(np.dtype('f8'))
else:
result = array.data.copy()
result[mask] = np.nan
else:
result = array.data
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lbdreyer This addresses the following numpy warning:

MaskedArrayFutureWarning: setting an item on a masked array which has a shared mask will not copy the mask and also change the original mask array in the future.
Check the NumPy 1.11 release notes for more information.

Which was generated by array[mask] = np.nan whenever the data was masked, but the dtype was not integral.

return result


Expand All @@ -118,7 +121,7 @@ def multidim_lazy_stack(stack):
Args:

* stack:
+ An ndarray of dask arrays.
An ndarray of dask arrays.

Returns:
The input array converted to a lazy dask array.
Expand All @@ -135,3 +138,70 @@ def multidim_lazy_stack(stack):
result = da.stack([multidim_lazy_stack(subarray)
for subarray in stack])
return result


def convert_nans_array(array, nans_replacement=None, result_dtype=None):
"""
Convert a :class:`~numpy.ndarray` that may contain one or more NaN values
to either a :class:`~numpy.ma.core.MaskedArray` or a
:class:`~numpy.ndarray` with the NaN values filled.

Args:

* array:
The :class:`~numpy.ndarray` to be converted.

Kwargs:

* nans_replacement:
If `nans_replacement` is None, then raise an exception if the `array`
contains any NaN values (default behaviour).
If `nans_replacement` is `numpy.ma.masked`, then convert the `array`
to a :class:`~numpy.ma.core.MaskedArray`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NOTE: something missing here.
We should explain that in 'masked array' mode the input array is copied, but in "fill or no-nans modes", the result is the input array, which may be modified in-place.

may be modified in-place

P.S. except when we do a change of dtype.
A bit slippery that, isn't it 😬 ??
Perhaps should just say "in some cases, the input is modified in-place" + leave it at that ?

Otherwise, use the specified `nans_replacement` value as the `array`
fill value.

* result_dtype:
Cast the resultant array to this target :class:`~numpy.dtype`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will not happen with pass-through cases (anything already masked, or integer types).
We should explain that here, somehow.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pp-mo This is actually mentioned in the doc-string note. I think that is sufficient.


Returns:
An :class:`numpy.ndarray`.

.. note::
An input array that is either a :class:`~numpy.ma.core.MaskedArray`
or has an integral dtype will be returned unaltered.

.. note::
In some cases, the input array is modified in-place.

"""
if not ma.isMaskedArray(array) and array.dtype.kind == 'f':
# First, calculate the mask.
mask = np.isnan(array)
# Now, cast the dtype, if required.
if result_dtype is not None:
result_dtype = np.dtype(result_dtype)
if array.dtype != result_dtype:
array = array.astype(result_dtype)
# Finally, mask or fill the data, as required or raise an exception
# if we detect there are NaNs present and we didn't expect any.
if np.any(mask):
if nans_replacement is None:
emsg = 'Array contains unexpected NaNs.'
raise ValueError(emsg)
elif nans_replacement is ma.masked:
# Mask the array with the default fill_value.
array = ma.masked_array(array, mask=mask)
else:
# Check the fill value is appropriate for the
# result array dtype.
try:
[fill_value] = np.asarray([nans_replacement],
dtype=array.dtype)
except OverflowError:
emsg = 'Fill value of {!r} invalid for array result {!r}.'
raise ValueError(emsg.format(nans_replacement,
array.dtype))
# Fill the array.
array[mask] = fill_value
return array
4 changes: 3 additions & 1 deletion lib/iris/analysis/trajectory.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) British Crown Copyright 2010 - 2016, Met Office
# (C) British Crown Copyright 2010 - 2017, Met Office
#
# This file is part of Iris.
#
Expand Down Expand Up @@ -341,6 +341,8 @@ def interpolate(cube, sample_points, method=None):
# This is **not** proper mask handling, because we cannot produce a
# masked result, but it ensures we use a "filled" version of the
# input in this case.
if cube.fill_value is not None:
source_data.fill_value = cube.fill_value
Copy link
Member Author

@bjlittle bjlittle Mar 16, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The way trajectory handles masks is pretty ropey ... this change appears appropriate.

The preceding inline code comment says it all really ...

source_data = source_data.filled()
new_cube.data[:] = source_data
# NOTE: we assign to "new_cube.data[:]" and *not* just "new_cube.data",
Expand Down
24 changes: 11 additions & 13 deletions lib/iris/coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@
import warnings
import zlib

import biggus
import iris._lazy_data
from iris._lazy_data import is_lazy_data
import dask.array as da
import netcdftime
import numpy as np
Expand Down Expand Up @@ -1611,7 +1610,7 @@ def _sanitise_array(self, src, ndmin):
@property
def points(self):
"""Property containing the points values as a numpy array"""
if iris._lazy_data.is_lazy_data(self._points):
if is_lazy_data(self._points):
self._points = self._points.compute()
return self._points.view()

Expand All @@ -1623,9 +1622,9 @@ def points(self, points):
# of 1 and is either a numpy or lazy array.
# This will avoid Scalar coords with points of shape () rather
# than the desired (1,)
if iris._lazy_data.is_lazy_data(points):
if is_lazy_data(points):
if points.shape == ():
points = points * np.ones(1)
points = da.reshape(points, (1,))
elif not isinstance(points, iris.aux_factory._LazyArray):
points = self._sanitise_array(points, 1)
# If points are already defined for this coordinate,
Expand All @@ -1649,8 +1648,8 @@ def bounds(self):
"""
if self._bounds is not None:
bounds = self._bounds
if isinstance(bounds, biggus.Array):
bounds = bounds.ndarray()
if is_lazy_data(bounds):
bounds = bounds.compute()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a better way to do this now? I stumbled across this biggus usage and felt obliged to address it ... apologies for being gratuitous in this PR.

Copy link
Member Author

@bjlittle bjlittle Mar 16, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps @pp-mo's as_concrete_data follow-on PR #2422 might make this melt away? @pp-mo is that the case?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anything with "compute" in it should be hit by #2422 to turn it into an "as_concrete_data" call.

self._bounds = bounds
bounds = bounds.view()
else:
Expand All @@ -1662,8 +1661,8 @@ def bounds(self):
def bounds(self, bounds):
# Ensure the bounds are a compatible shape.
if bounds is not None:
if not isinstance(bounds, (iris.aux_factory._LazyArray,
biggus.Array)):
if not (isinstance(bounds, iris.aux_factory._LazyArray) or
is_lazy_data(bounds)):
bounds = self._sanitise_array(bounds, 2)
# NB. Use _points to avoid triggering any lazy array.
if self._points.shape != bounds.shape[:-1]:
Expand Down Expand Up @@ -1742,10 +1741,9 @@ def measure(self):
def data(self):
"""Property containing the data values as a numpy array"""
data = self._data
if isinstance(data, biggus.Array):
data = data.ndarray()
self._data = data
return data.view()
if is_lazy_data(self._data):
self._data = self._data.compute()
Copy link
Member Author

@bjlittle bjlittle Mar 16, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, is this now the best way to do this ... again #2422 might address this in a better way

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likewise subject to #2422

return self._data.view()

@data.setter
def data(self, data):
Expand Down
22 changes: 11 additions & 11 deletions lib/iris/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
import iris._concatenate
import iris._constraints
from iris._deprecation import warn_deprecated
from iris._lazy_data import array_masked_to_nans, as_lazy_data, is_lazy_data
from iris._lazy_data import (array_masked_to_nans, as_lazy_data,
convert_nans_array, is_lazy_data)
import iris._merge
import iris.analysis
from iris.analysis.cartography import wrap_lons
Expand Down Expand Up @@ -1733,16 +1734,15 @@ def data(self):
if self.has_lazy_data():
try:
data = self._dask_array.compute()
mask = np.isnan(data)
if data.dtype != self.dtype:
data = data.astype(self.dtype)
self.dtype = None
if np.all(~mask):
self._numpy_array = data
else:
fv = self.fill_value
self._numpy_array = ma.masked_array(data, mask=mask,
fill_value=fv)
# Now convert the data payload from a NaN array to a
# masked array, and if appropriate cast to the specified
# cube result dtype.
result = convert_nans_array(data,
nans_replacement=ma.masked,
result_dtype=self.dtype)
self._numpy_array = result
self.dtype = None

except MemoryError:
msg = "Failed to create the cube's data as there was not" \
" enough memory available.\n" \
Expand Down
19 changes: 13 additions & 6 deletions lib/iris/fileformats/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import string
import warnings

import biggus
import dask.array as da
import netCDF4
import numpy as np
import numpy.ma as ma
Expand All @@ -56,7 +56,8 @@
import iris.fileformats._pyke_rules
import iris.io
import iris.util
from iris._lazy_data import array_masked_to_nans, as_lazy_data
from iris._lazy_data import (array_masked_to_nans, as_lazy_data,
convert_nans_array)

# Show Pyke inference engine statistics.
DEBUG = False
Expand Down Expand Up @@ -1938,16 +1939,22 @@ def set_packing_ncattrs(cfvar):
# Explicitly assign the fill_value, which will be the type default
# in the case of an unmasked array.
if packing is None:
fill_value = cube.lazy_data().fill_value
fill_value = cube.fill_value
dtype = cube.lazy_data().dtype.newbyteorder('=')

cf_var = self._dataset.createVariable(
cf_name, dtype,
dimension_names, fill_value=fill_value,
**kwargs)
set_packing_ncattrs(cf_var)
# stream the data
biggus.save([cube.lazy_data()], [cf_var], masked=True)

# Now stream the cube data payload straight to the netCDF
# data variable within the netCDF file, where any NaN values
# are replaced with the specified cube fill_value.
data = da.map_blocks(convert_nans_array, cube.lazy_data(),
nans_replacement=cube.fill_value,
result_dtype=cube.dtype)
da.store([data], [cf_var])

if cube.standard_name:
_setncattr(cf_var, 'standard_name', cube.standard_name)
Expand Down Expand Up @@ -2045,7 +2052,7 @@ def save(cube, filename, netcdf_format='NETCDF4', local_keys=None,
* Keyword arguments specifying how to save the data are applied
to each cube. To use different settings for different cubes, use
the NetCDF Context manager (:class:`~Saver`) directly.
* The save process will stream the data payload to the file using biggus,
* The save process will stream the data payload to the file using dask,
enabling large data payloads to be saved and maintaining the 'lazy'
status of the cube's data payload, unless the netcdf_format is explicitly
specified to be 'NETCDF3' or 'NETCDF3_CLASSIC'.
Expand Down
5 changes: 0 additions & 5 deletions lib/iris/tests/integration/test_netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def test_save(self):
iris.save(self.cube, filename)
self.assertCDL(filename)

@tests.skip_biggus
def test_save_load_loop(self):
# Tests an issue where the variable names in the formula
# terms changed to the standard_names instead of the variable names
Expand Down Expand Up @@ -201,7 +200,6 @@ def test_patching_conventions_attribute(self):
class TestLazySave(tests.IrisTest):

@tests.skip_data
@tests.skip_biggus
def test_lazy_preserved_save(self):
fpath = tests.get_data_path(('NetCDF', 'label_and_climate',
'small_FC_167_mon_19601101.nc'))
Expand All @@ -212,7 +210,6 @@ def test_lazy_preserved_save(self):
saver.write(acube)
self.assertTrue(acube.has_lazy_data())

@tests.skip_biggus
def test_lazy_mask_preserve_fill_value(self):
cube = iris.cube.Cube(np.ma.array([0, 1], mask=[False, True],
fill_value=-1))
Expand Down Expand Up @@ -272,7 +269,6 @@ def test_concatenate_cell_measure_match(self):
self.assertEqual(cubes[0]._cell_measures_and_dims, cm_and_dims)
self.assertEqual(len(cubes), 1)

@tests.skip_biggus
def test_round_trip(self):
cube, = iris.load(self.fname)
with self.temp_filename(suffix='.nc') as filename:
Expand Down Expand Up @@ -430,7 +426,6 @@ def test_multi_packed_single_dtype(self):
# Read PP input file.
self._multi_test('multi_packed_single_dtype.cdl')

@tests.skip_biggus
def test_multi_packed_multi_dtype(self):
"""Test saving multiple packed cubes with pack_dtype list."""
# Read PP input file.
Expand Down
2 changes: 1 addition & 1 deletion lib/iris/tests/integration/test_trajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def test_tri_polar__nearest(self):
# TODO: arguably, we should support masked data properly in the
# interpolation routine. In the legacy code, that is unfortunately
# just not the case.
test_cube.data.set_fill_value(0.0)
test_cube.fill_value = 0
Copy link
Member Author

@bjlittle bjlittle Mar 16, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm just aping the same behaviour for this test, in terms of trajectories approach to dealing with masked data, in order to yield the same expected result. Seems appropriate.


# Test points on a regular global grid, with unrelated steps + offsets
# and an extended range of longitude values.
Expand Down
8 changes: 4 additions & 4 deletions lib/iris/tests/results/netcdf/netcdf_monotonic.cml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" ?>
<cubes xmlns="urn:x-iris:cubeml-0.2">
<cube standard_name="eastward_wind" units="m s-1" var_name="wind1">
<cube core-dtype="int32" dtype="int32" standard_name="eastward_wind" units="m s-1" var_name="wind1">
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At some point we need to take out these core-dtype from the cml.
Perhaps at the end of this sprint (considering we want to target fill_value/dtype/get-tests-working this sprint)

<attributes>
<attribute name="test" value="weak-monotonic time coordinate"/>
</attributes>
Expand All @@ -18,7 +18,7 @@
<cellMethods/>
<data checksum="0xd4e7a32f" dtype="int32" shape="(3, 3, 3)"/>
</cube>
<cube standard_name="eastward_wind" units="m s-1" var_name="wind2">
<cube core-dtype="int32" dtype="int32" standard_name="eastward_wind" units="m s-1" var_name="wind2">
<attributes>
<attribute name="test" value="masked monotonic time coordinate"/>
</attributes>
Expand All @@ -36,7 +36,7 @@
<cellMethods/>
<data checksum="0xd4e7a32f" dtype="int32" shape="(3, 3, 3)"/>
</cube>
<cube standard_name="eastward_wind" units="m s-1" var_name="wind3">
<cube core-dtype="int32" dtype="int32" standard_name="eastward_wind" units="m s-1" var_name="wind3">
<attributes>
<attribute name="test" value="masked non-monotonic time coordinate"/>
</attributes>
Expand All @@ -52,6 +52,6 @@
</coord>
</coords>
<cellMethods/>
<data checksum="0x0f7cfdf3" dtype="int32" fill_value="-2147483647" mask_checksum="0xc0aeb298" shape="(3, 3, 3)"/>
<data checksum="0x0f7cfdf3" dtype="int32" mask_checksum="0xc0aeb298" shape="(3, 3, 3)"/>
</cube>
</cubes>
2 changes: 1 addition & 1 deletion lib/iris/tests/results/netcdf/netcdf_rotated_xy_land.cml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" ?>
<cubes xmlns="urn:x-iris:cubeml-0.2">
<cube long_name="land area fraction of grid cell" standard_name="land_area_fraction" units="1" var_name="sftls">
<cube core-dtype="float32" dtype="float32" fill_value="1e+30" long_name="land area fraction of grid cell" standard_name="land_area_fraction" units="1" var_name="sftls">
<attributes>
<attribute name="Conventions" value="CF-1.0"/>
<attribute name="conventionsURL" value="http://www.cgd.ucar.edu/cms/eaton/cf-metadata/index.html"/>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" ?>
<cubes xmlns="urn:x-iris:cubeml-0.2">
<cube standard_name="air_potential_temperature" units="K" var_name="air_potential_temperature">
<cube core-dtype="float32" dtype="float32" fill_value="-1.07374e+09" standard_name="air_potential_temperature" units="K" var_name="air_potential_temperature">
<attributes>
<attribute name="Conventions" value="CF-1.5"/>
<attribute name="STASH" value="m01s00i004"/>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" ?>
<cubes xmlns="urn:x-iris:cubeml-0.2">
<cube long_name="Precipitation" standard_name="precipitation_flux" units="kg m-2 s-1" var_name="pr">
<cube core-dtype="float32" dtype="float32" fill_value="1e+30" long_name="Precipitation" standard_name="precipitation_flux" units="kg m-2 s-1" var_name="pr">
<attributes>
<attribute name="Conventions" value="CF-1.5"/>
<attribute name="NCO" value="4.1.0"/>
Expand Down
2 changes: 1 addition & 1 deletion lib/iris/tests/results/netcdf/save_load_traj.cml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" ?>
<cubes xmlns="urn:x-iris:cubeml-0.2">
<cube standard_name="air_temperature" units="K" var_name="air_temperature">
<cube core-dtype="float64" dtype="float64" standard_name="air_temperature" units="K" var_name="air_temperature">
<attributes>
<attribute name="Conventions" value="CF-1.5"/>
<attribute name="STASH" value="m01s16i203"/>
Expand Down
Loading