marqh · marqh · Feb 23, 2017 · Feb 23, 2017 · Feb 13, 2017 · Feb 13, 2017
diff --git a/.travis.yml b/.travis.yml
@@ -15,8 +15,6 @@ env:
   - TEST_TARGET=default
   - TEST_TARGET=default TEST_MINIMAL=true
   - TEST_TARGET=coding
-  - TEST_TARGET=example
-  - TEST_TARGET=doctest
 
 git:
   depth: 10000
@@ -49,12 +47,12 @@ install:
 
   # Customise the testing environment
   # ---------------------------------
-  - conda config --add channels scitools
+  - conda config --add channels conda-forge
   - if [[ "$TEST_MINIMAL" == true ]]; then
       conda install --quiet --file minimal-conda-requirements.txt;
     else
       if [[ "$TRAVIS_PYTHON_VERSION" == 3* ]]; then
-        sed -e '/ecmwf_grib/d' -e '/esmpy/d' -e '/iris_grib/d' -e 's/#.\+$//' conda-requirements.txt | xargs conda install --quiet;
+        sed -e '/python-ecmwf_grib/d' -e '/esmpy/d' -e 's/#.\+$//' conda-requirements.txt | xargs conda install --quiet;
       else
         conda install --quiet --file conda-requirements.txt;
       fi

diff --git a/conda-requirements.txt b/conda-requirements.txt
@@ -10,6 +10,7 @@ numpy
 pyke
 udunits2
 cf_units
+dask
 
 # Iris build dependencies
 setuptools
@@ -19,14 +20,14 @@ mock
 nose
 pep8
 sphinx
-iris_sample_data
+iris-sample-data
 filelock
 imagehash
 requests
 
 # Optional iris dependencies
 nc_time_axis
-iris_grib
+python-eccodes
 esmpy>=7.0
 gdal
 libmo_unpack

diff --git a/docs/iris/src/developers_guide/dask_interface.rst b/docs/iris/src/developers_guide/dask_interface.rst
@@ -0,0 +1,23 @@
+Iris Dask Interface
+*******************
+
+Iris uses dask (http://dask.pydata.org) to manage lazy data interfaces and processing graphs.  The key principles which define this interface are:
+
+* A call to `cube.data` will always load all of the data.
+  * Once this has happened:
+    * `cube.data` is a mutable numpy masked array or ndarray;
+    * `cube._numpy_array` is a private numpy masked array, accessible via `cube.data`, which may strip off the mask and return a reference to the bare ndarray.
+* `cube.data` may be used to set the data, this accepts:
+  * a numpy array (including masked array), which is assigned to `cube._numpy_array`;
+  * a dask array, which is assigned to `cube._dask_array` an `cube._numpy_array` is set to None.
+* `cube._dask_array` may be None, otherwise it is expected to be a dask graph:
+  * this may wrap a proxy to a file collection;
+  * this may wrap the numpy array in `cube._numpy_array`.
+* All dask graphs wrap array-like object where missing data is represented by `nan`:
+  * masked arrays derived from these arrays shall create their mask using the nan location;
+  * where dask wrapped `int` arrays require masks, these will first be cast to `float`.
+* In order to support this mask conversion, cube's have a `fill_value` as part of their metadata, which may be None.
+* Array copying is kept to an absolute minimum:
+  * array references should always be passed, not new arrays created, unless an explicit copy operation is requested.
+* To test for the presence of a dask array of any sort, we use:
+  * `iris._lazy_data.is_lazy_data` which is implemented as `hasattr(data, 'compute')`.
diff --git a/docs/iris/src/developers_guide/index.rst b/docs/iris/src/developers_guide/index.rst
@@ -38,3 +38,4 @@
    tests.rst
    deprecations.rst
    release.rst
+   dask_interface.rst
diff --git a/lib/iris/_concatenate.py b/lib/iris/_concatenate.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2013 - 2016, Met Office
+# (C) British Crown Copyright 2013 - 2017, Met Office
 #
 # This file is part of Iris.
 #
@@ -26,7 +26,7 @@
 from collections import defaultdict, namedtuple
 from copy import deepcopy
 
-import biggus
+import dask.array as da
 import numpy as np
 
 import iris.coords
@@ -842,7 +842,7 @@ def _build_data(self):
         skeletons = self._skeletons
         data = [skeleton.data for skeleton in skeletons]
 
-        data = biggus.LinearMosaic(tuple(data), axis=self.axis)
+        data = da.concatenate(data, self.axis)
 
         return data
 

diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py
@@ -0,0 +1,54 @@
+# (C) British Crown Copyright 2017, Met Office
+#
+# This file is part of Iris.
+#
+# Iris is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Iris is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with Iris.  If not, see <http://www.gnu.org/licenses/>.
+"""
+Routines for lazy data handling.
+
+To avoid replicating implementation-dependent test and conversion code.
+
+"""
+from __future__ import (absolute_import, division, print_function)
+from six.moves import (filter, input, map, range, zip)  # noqa
+
+import dask.array as da
+import numpy as np
+
+
+def is_lazy_data(data):
+    """
+    Return whether the argument is an Iris 'lazy' data array.
+
+    At present, this means simply a Dask array.
+    We determine this by checking for a "compute" property.
+    NOTE: ***for now only*** accept Biggus arrays also.
+
+    """
+    result = hasattr(data, 'compute')
+    return result
+
+
+def array_masked_to_nans(array, mask=None):
+    """
+    Convert a masked array to a normal array with NaNs at masked points.
+    This is used for dask integration, as dask does not support masked arrays.
+    Note that any fill value will be lost.
+    """
+    if mask is None:
+        mask = array.mask
+    if array.dtype.kind == 'i':
+        array = array.astype(np.dtype('f8'))
+    array[mask] = np.nan
+    return array
diff --git a/lib/iris/_merge.py b/lib/iris/_merge.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2010 - 2016, Met Office
+# (C) British Crown Copyright 2010 - 2017, Met Office
 #
 # This file is part of Iris.
 #
@@ -29,10 +29,11 @@
 from collections import namedtuple, OrderedDict
 from copy import deepcopy
 
-import biggus
+import dask.array as da
 import numpy as np
 import numpy.ma as ma
 
+from iris._lazy_data import is_lazy_data, array_masked_to_nans
 import iris.cube
 import iris.coords
 import iris.exceptions
@@ -1068,6 +1069,27 @@ def derive_space(groups, relation_matrix, positions, function_matrix=None):
     return space
 
 
+def _multidim_daskstack(stack):
+    """
+    Recursively build a multidensional stacked dask array.
+
+    The argument is an ndarray of dask arrays.
+    This is needed because dask.array.stack only accepts a 1-dimensional list.
+
+    """
+    if stack.ndim == 0:
+        # A 0-d array cannot be merged.
+        result = stack.item()
+    elif stack.ndim == 1:
+        # 'Another' base case : simple 1-d goes direct in dask.
+        result = da.stack(list(stack))
+    else:
+        # Recurse because dask.stack does not do multi-dimensional.
+        result = da.stack([_multidim_daskstack(subarray)
+                           for subarray in stack])
+    return result
+
+
 class ProtoCube(object):
     """
     Framework for merging source-cubes into one or more higher
@@ -1192,10 +1214,10 @@ def merge(self, unique=True):
         # Generate group-depth merged cubes from the source-cubes.
         for level in range(group_depth):
             # Stack up all the data from all of the relevant source
-            # cubes in a single biggus ArrayStack.
+            # cubes in a single dask "stacked" array.
             # If it turns out that all the source cubes already had
-            # their data loaded then at the end we can convert the
-            # ArrayStack back to a numpy array.
+            # their data loaded then at the end we convert the stack back
+            # into a plain numpy array.
             stack = np.empty(self._stack_shape, 'object')
             all_have_data = True
             for nd_index in nd_indexes:
@@ -1204,17 +1226,23 @@ def merge(self, unique=True):
                 group = group_by_nd_index[nd_index]
                 offset = min(level, len(group) - 1)
                 data = self._skeletons[group[offset]].data
-                # Ensure the data is represented as a biggus.Array and
-                # slot that Array into the stack.
-                if isinstance(data, biggus.Array):
+                # Ensure the data is represented as a dask array and
+                # slot that array into the stack.
+                if is_lazy_data(data):
                     all_have_data = False
                 else:
-                    data = biggus.NumpyArrayAdapter(data)
+                    if isinstance(data, ma.MaskedArray):
+                        if ma.is_masked(data):
+                            data = array_masked_to_nans(data)
+                        data = data.data
+                    data = da.from_array(data, chunks=data.shape)
                 stack[nd_index] = data
 
-            merged_data = biggus.ArrayStack(stack)
+            merged_data = _multidim_daskstack(stack)
             if all_have_data:
-                merged_data = merged_data.masked_array()
+                # All inputs were concrete, so turn the result back into a
+                # normal array.
+                merged_data = merged_data.compute()
                 # Unmask the array only if it is filled.
                 if (ma.isMaskedArray(merged_data) and
                         ma.count_masked(merged_data) == 0):