SciTools · lbdreyer · Mar 20, 2017 · Feb 25, 2017 · Mar 9, 2017 · Mar 9, 2017
diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py
@@ -101,11 +101,14 @@ def array_masked_to_nans(array):
         result = array
     else:
         if ma.is_masked(array):
-            if array.dtype.kind == 'i':
-                array = array.astype(np.dtype('f8'))
             mask = array.mask
-            array[mask] = np.nan
-        result = array.data
+            if array.dtype.kind == 'i':
+                result = array.data.astype(np.dtype('f8'))
+            else:
+                result = array.data.copy()
+            result[mask] = np.nan
+        else:
+            result = array.data
     return result
 
 
@@ -118,7 +121,7 @@ def multidim_lazy_stack(stack):
     Args:
 
     * stack:
-+        An ndarray of dask arrays.
+        An ndarray of dask arrays.
 
     Returns:
         The input array converted to a lazy dask array.
@@ -135,3 +138,70 @@ def multidim_lazy_stack(stack):
         result = da.stack([multidim_lazy_stack(subarray)
                            for subarray in stack])
     return result
+
+
+def convert_nans_array(array, nans_replacement=None, result_dtype=None):
+    """
+    Convert a :class:`~numpy.ndarray` that may contain one or more NaN values
+    to either a :class:`~numpy.ma.core.MaskedArray` or a
+    :class:`~numpy.ndarray` with the NaN values filled.
+
+    Args:
+
+    * array:
+        The :class:`~numpy.ndarray` to be converted.
+
+    Kwargs:
+
+    * nans_replacement:
+        If `nans_replacement` is None, then raise an exception if the `array`
+        contains any NaN values (default behaviour).
+        If `nans_replacement` is `numpy.ma.masked`, then convert the `array`
+        to a :class:`~numpy.ma.core.MaskedArray`.
+        Otherwise, use the specified `nans_replacement` value as the `array`
+        fill value.
+
+    * result_dtype:
+        Cast the resultant array to this target :class:`~numpy.dtype`.
+
+    Returns:
+        An :class:`numpy.ndarray`.
+
+    .. note::
+        An input array that is either a :class:`~numpy.ma.core.MaskedArray`
+        or has an integral dtype will be returned unaltered.
+
+    .. note::
+        In some cases, the input array is modified in-place.
+
+    """
+    if not ma.isMaskedArray(array) and array.dtype.kind == 'f':
+        # First, calculate the mask.
+        mask = np.isnan(array)
+        # Now, cast the dtype, if required.
+        if result_dtype is not None:
+            result_dtype = np.dtype(result_dtype)
+            if array.dtype != result_dtype:
+                array = array.astype(result_dtype)
+        # Finally, mask or fill the data, as required or raise an exception
+        # if we detect there are NaNs present and we didn't expect any.
+        if np.any(mask):
+            if nans_replacement is None:
+                emsg = 'Array contains unexpected NaNs.'
+                raise ValueError(emsg)
+            elif nans_replacement is ma.masked:
+                # Mask the array with the default fill_value.
+                array = ma.masked_array(array, mask=mask)
+            else:
+                # Check the fill value is appropriate for the
+                # result array dtype.
+                try:
+                    [fill_value] = np.asarray([nans_replacement],
+                                              dtype=array.dtype)
+                except OverflowError:
+                    emsg = 'Fill value of {!r} invalid for array result {!r}.'
+                    raise ValueError(emsg.format(nans_replacement,
+                                                 array.dtype))
+                # Fill the array.
+                array[mask] = fill_value
+    return array
diff --git a/lib/iris/analysis/trajectory.py b/lib/iris/analysis/trajectory.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2010 - 2016, Met Office
+# (C) British Crown Copyright 2010 - 2017, Met Office
 #
 # This file is part of Iris.
 #
@@ -341,6 +341,8 @@ def interpolate(cube, sample_points, method=None):
             # This is **not** proper mask handling, because we cannot produce a
             # masked result, but it ensures we use a "filled" version of the
             # input in this case.
+            if cube.fill_value is not None:
+                source_data.fill_value = cube.fill_value
             source_data = source_data.filled()
         new_cube.data[:] = source_data
         # NOTE: we assign to "new_cube.data[:]" and *not* just "new_cube.data",

diff --git a/lib/iris/coords.py b/lib/iris/coords.py
@@ -32,8 +32,7 @@
 import warnings
 import zlib
 
-import biggus
-import iris._lazy_data
+from iris._lazy_data import is_lazy_data
 import dask.array as da
 import netcdftime
 import numpy as np
@@ -1611,7 +1610,7 @@ def _sanitise_array(self, src, ndmin):
     @property
     def points(self):
         """Property containing the points values as a numpy array"""
-        if iris._lazy_data.is_lazy_data(self._points):
+        if is_lazy_data(self._points):
             self._points = self._points.compute()
         return self._points.view()
 
@@ -1623,9 +1622,9 @@ def points(self, points):
         # of 1 and is either a numpy or lazy array.
         # This will avoid Scalar coords with points of shape () rather
         # than the desired (1,)
-        if iris._lazy_data.is_lazy_data(points):
+        if is_lazy_data(points):
             if points.shape == ():
-                points = points * np.ones(1)
+                points = da.reshape(points, (1,))
         elif not isinstance(points, iris.aux_factory._LazyArray):
             points = self._sanitise_array(points, 1)
         # If points are already defined for this coordinate,
@@ -1649,8 +1648,8 @@ def bounds(self):
         """
         if self._bounds is not None:
             bounds = self._bounds
-            if isinstance(bounds, biggus.Array):
-                bounds = bounds.ndarray()
+            if is_lazy_data(bounds):
+                bounds = bounds.compute()
                 self._bounds = bounds
             bounds = bounds.view()
         else:
@@ -1662,8 +1661,8 @@ def bounds(self):
     def bounds(self, bounds):
         # Ensure the bounds are a compatible shape.
         if bounds is not None:
-            if not isinstance(bounds, (iris.aux_factory._LazyArray,
-                                       biggus.Array)):
+            if not (isinstance(bounds, iris.aux_factory._LazyArray) or
+                    is_lazy_data(bounds)):
                 bounds = self._sanitise_array(bounds, 2)
             # NB. Use _points to avoid triggering any lazy array.
             if self._points.shape != bounds.shape[:-1]:
@@ -1742,10 +1741,9 @@ def measure(self):
     def data(self):
         """Property containing the data values as a numpy array"""
         data = self._data
-        if isinstance(data, biggus.Array):
-            data = data.ndarray()
-            self._data = data
-        return data.view()
+        if is_lazy_data(self._data):
+            self._data = self._data.compute()
+        return self._data.view()
 
     @data.setter
     def data(self, data):

diff --git a/lib/iris/cube.py b/lib/iris/cube.py
@@ -42,7 +42,8 @@
 import iris._concatenate
 import iris._constraints
 from iris._deprecation import warn_deprecated
-from iris._lazy_data import array_masked_to_nans, as_lazy_data, is_lazy_data
+from iris._lazy_data import (array_masked_to_nans, as_lazy_data,
+                             convert_nans_array, is_lazy_data)
 import iris._merge
 import iris.analysis
 from iris.analysis.cartography import wrap_lons
@@ -1733,16 +1734,15 @@ def data(self):
         if self.has_lazy_data():
             try:
                 data = self._dask_array.compute()
-                mask = np.isnan(data)
-                if data.dtype != self.dtype:
-                    data = data.astype(self.dtype)
-                    self.dtype = None
-                if np.all(~mask):
-                    self._numpy_array = data
-                else:
-                    fv = self.fill_value
-                    self._numpy_array = ma.masked_array(data, mask=mask,
-                                                        fill_value=fv)
+                # Now convert the data payload from a NaN array to a
+                # masked array, and if appropriate cast to the specified
+                # cube result dtype.
+                result = convert_nans_array(data,
+                                            nans_replacement=ma.masked,
+                                            result_dtype=self.dtype)
+                self._numpy_array = result
+                self.dtype = None
+
             except MemoryError:
                 msg = "Failed to create the cube's data as there was not" \
                       " enough memory available.\n" \

diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py
@@ -37,7 +37,7 @@
 import string
 import warnings
 
-import biggus
+import dask.array as da
 import netCDF4
 import numpy as np
 import numpy.ma as ma
@@ -56,7 +56,8 @@
 import iris.fileformats._pyke_rules
 import iris.io
 import iris.util
-from iris._lazy_data import array_masked_to_nans, as_lazy_data
+from iris._lazy_data import (array_masked_to_nans, as_lazy_data,
+                             convert_nans_array)
 
 # Show Pyke inference engine statistics.
 DEBUG = False
@@ -1938,16 +1939,22 @@ def set_packing_ncattrs(cfvar):
             # Explicitly assign the fill_value, which will be the type default
             # in the case of an unmasked array.
             if packing is None:
-                fill_value = cube.lazy_data().fill_value
+                fill_value = cube.fill_value
                 dtype = cube.lazy_data().dtype.newbyteorder('=')
 
             cf_var = self._dataset.createVariable(
                 cf_name, dtype,
                 dimension_names, fill_value=fill_value,
                 **kwargs)
             set_packing_ncattrs(cf_var)
-            # stream the data
-            biggus.save([cube.lazy_data()], [cf_var], masked=True)
+
+            # Now stream the cube data payload straight to the netCDF
+            # data variable within the netCDF file, where any NaN values
+            # are replaced with the specified cube fill_value.
+            data = da.map_blocks(convert_nans_array, cube.lazy_data(),
+                                 nans_replacement=cube.fill_value,
+                                 result_dtype=cube.dtype)
+            da.store([data], [cf_var])
 
         if cube.standard_name:
             _setncattr(cf_var, 'standard_name', cube.standard_name)
@@ -2045,7 +2052,7 @@ def save(cube, filename, netcdf_format='NETCDF4', local_keys=None,
     * Keyword arguments specifying how to save the data are applied
       to each cube. To use different settings for different cubes, use
       the NetCDF Context manager (:class:`~Saver`) directly.
-    * The save process will stream the data payload to the file using biggus,
+    * The save process will stream the data payload to the file using dask,
       enabling large data payloads to be saved and maintaining the 'lazy'
       status of the cube's data payload, unless the netcdf_format is explicitly
       specified to be 'NETCDF3' or 'NETCDF3_CLASSIC'.

diff --git a/lib/iris/tests/integration/test_netcdf.py b/lib/iris/tests/integration/test_netcdf.py
@@ -67,7 +67,6 @@ def test_save(self):
             iris.save(self.cube, filename)
             self.assertCDL(filename)
 
-    @tests.skip_biggus
     def test_save_load_loop(self):
         # Tests an issue where the variable names in the formula
         # terms changed to the standard_names instead of the variable names
@@ -201,7 +200,6 @@ def test_patching_conventions_attribute(self):
 class TestLazySave(tests.IrisTest):
 
     @tests.skip_data
-    @tests.skip_biggus
     def test_lazy_preserved_save(self):
         fpath = tests.get_data_path(('NetCDF', 'label_and_climate',
                                      'small_FC_167_mon_19601101.nc'))
@@ -212,7 +210,6 @@ def test_lazy_preserved_save(self):
                 saver.write(acube)
         self.assertTrue(acube.has_lazy_data())
 
-    @tests.skip_biggus
     def test_lazy_mask_preserve_fill_value(self):
         cube = iris.cube.Cube(np.ma.array([0, 1], mask=[False, True],
                                           fill_value=-1))
@@ -272,7 +269,6 @@ def test_concatenate_cell_measure_match(self):
         self.assertEqual(cubes[0]._cell_measures_and_dims, cm_and_dims)
         self.assertEqual(len(cubes), 1)
 
-    @tests.skip_biggus
     def test_round_trip(self):
         cube, = iris.load(self.fname)
         with self.temp_filename(suffix='.nc') as filename:
@@ -430,7 +426,6 @@ def test_multi_packed_single_dtype(self):
         # Read PP input file.
         self._multi_test('multi_packed_single_dtype.cdl')
 
-    @tests.skip_biggus
     def test_multi_packed_multi_dtype(self):
         """Test saving multiple packed cubes with pack_dtype list."""
         # Read PP input file.

diff --git a/lib/iris/tests/integration/test_trajectory.py b/lib/iris/tests/integration/test_trajectory.py
@@ -203,7 +203,7 @@ def test_tri_polar__nearest(self):
         # TODO: arguably, we should support masked data properly in the
         # interpolation routine.  In the legacy code, that is unfortunately
         # just not the case.
-        test_cube.data.set_fill_value(0.0)
+        test_cube.fill_value = 0
 
         # Test points on a regular global grid, with unrelated steps + offsets
         # and an extended range of longitude values.

diff --git a/lib/iris/tests/results/netcdf/netcdf_monotonic.cml b/lib/iris/tests/results/netcdf/netcdf_monotonic.cml
@@ -1,6 +1,6 @@
 <?xml version="1.0" ?>
 <cubes xmlns="urn:x-iris:cubeml-0.2">
-  <cube standard_name="eastward_wind" units="m s-1" var_name="wind1">
+  <cube core-dtype="int32" dtype="int32" standard_name="eastward_wind" units="m s-1" var_name="wind1">
     <attributes>
       <attribute name="test" value="weak-monotonic time coordinate"/>
     </attributes>
@@ -18,7 +18,7 @@
     <cellMethods/>
     <data checksum="0xd4e7a32f" dtype="int32" shape="(3, 3, 3)"/>
   </cube>
-  <cube standard_name="eastward_wind" units="m s-1" var_name="wind2">
+  <cube core-dtype="int32" dtype="int32" standard_name="eastward_wind" units="m s-1" var_name="wind2">
     <attributes>
       <attribute name="test" value="masked monotonic time coordinate"/>
     </attributes>
@@ -36,7 +36,7 @@
     <cellMethods/>
     <data checksum="0xd4e7a32f" dtype="int32" shape="(3, 3, 3)"/>
   </cube>
-  <cube standard_name="eastward_wind" units="m s-1" var_name="wind3">
+  <cube core-dtype="int32" dtype="int32" standard_name="eastward_wind" units="m s-1" var_name="wind3">
     <attributes>
       <attribute name="test" value="masked non-monotonic time coordinate"/>
     </attributes>
@@ -52,6 +52,6 @@
       </coord>
     </coords>
     <cellMethods/>
-    <data checksum="0x0f7cfdf3" dtype="int32" fill_value="-2147483647" mask_checksum="0xc0aeb298" shape="(3, 3, 3)"/>
+    <data checksum="0x0f7cfdf3" dtype="int32" mask_checksum="0xc0aeb298" shape="(3, 3, 3)"/>
   </cube>
 </cubes>
diff --git a/lib/iris/tests/results/netcdf/netcdf_rotated_xy_land.cml b/lib/iris/tests/results/netcdf/netcdf_rotated_xy_land.cml
@@ -1,6 +1,6 @@
 <?xml version="1.0" ?>
 <cubes xmlns="urn:x-iris:cubeml-0.2">
-  <cube long_name="land area fraction of grid cell" standard_name="land_area_fraction" units="1" var_name="sftls">
+  <cube core-dtype="float32" dtype="float32" fill_value="1e+30" long_name="land area fraction of grid cell" standard_name="land_area_fraction" units="1" var_name="sftls">
     <attributes>
       <attribute name="Conventions" value="CF-1.0"/>
       <attribute name="conventionsURL" value="http://www.cgd.ucar.edu/cms/eaton/cf-metadata/index.html"/>

diff --git a/lib/iris/tests/results/netcdf/netcdf_save_load_hybrid_height.cml b/lib/iris/tests/results/netcdf/netcdf_save_load_hybrid_height.cml
@@ -1,6 +1,6 @@
 <?xml version="1.0" ?>
 <cubes xmlns="urn:x-iris:cubeml-0.2">
-  <cube standard_name="air_potential_temperature" units="K" var_name="air_potential_temperature">
+  <cube core-dtype="float32" dtype="float32" fill_value="-1.07374e+09" standard_name="air_potential_temperature" units="K" var_name="air_potential_temperature">
     <attributes>
       <attribute name="Conventions" value="CF-1.5"/>
       <attribute name="STASH" value="m01s00i004"/>

diff --git a/lib/iris/tests/results/netcdf/netcdf_save_load_ndim_auxiliary.cml b/lib/iris/tests/results/netcdf/netcdf_save_load_ndim_auxiliary.cml
@@ -1,6 +1,6 @@
 <?xml version="1.0" ?>
 <cubes xmlns="urn:x-iris:cubeml-0.2">
-  <cube long_name="Precipitation" standard_name="precipitation_flux" units="kg m-2 s-1" var_name="pr">
+  <cube core-dtype="float32" dtype="float32" fill_value="1e+30" long_name="Precipitation" standard_name="precipitation_flux" units="kg m-2 s-1" var_name="pr">
     <attributes>
       <attribute name="Conventions" value="CF-1.5"/>
       <attribute name="NCO" value="4.1.0"/>

diff --git a/lib/iris/tests/results/netcdf/save_load_traj.cml b/lib/iris/tests/results/netcdf/save_load_traj.cml
@@ -1,6 +1,6 @@
 <?xml version="1.0" ?>
 <cubes xmlns="urn:x-iris:cubeml-0.2">
-  <cube standard_name="air_temperature" units="K" var_name="air_temperature">
+  <cube core-dtype="float64" dtype="float64" standard_name="air_temperature" units="K" var_name="air_temperature">
     <attributes>
       <attribute name="Conventions" value="CF-1.5"/>
       <attribute name="STASH" value="m01s16i203"/>