SciTools · djkirkham · Oct 12, 2017 · Aug 17, 2017 · Aug 18, 2017 · Aug 18, 2017
diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py
@@ -1419,36 +1419,41 @@ def interp_order(length):
 """
 
 
-def _build_dask_mdtol_function(dask_nanfunction):
+def _build_dask_mdtol_function(dask_stats_function):
     """
     Make a wrapped dask statistic function that supports the 'mdtol' keyword.
 
-    'dask_nanfunction' must be a statistical function that treats NaNs as
-    missing data, and which has the basic call signature
-    "dask_nanfunction(data, axis, **kwargs)".
+    'dask_function' must be a dask statistical function, compatible with the
+    call signature : "dask_stats_function(data, axis, **kwargs)".
+    It must be masked-data tolerant, i.e. it ignores masked input points and
+    performs a calculation on only the unmasked points.
+    For example, mean([1, --, 2]) = (1 + 2) / 2 = 1.5.
 
     The returned value is a new function operating on dask arrays.
-    It has the call signature "stat(data, axis=-1, mdtol=None, *kwargs)".
+    It has the call signature `stat(data, axis=-1, mdtol=None, **kwargs)`.
 
     """
-    @wraps(dask_nanfunction)
+    @wraps(dask_stats_function)
     def inner_stat(array, axis=-1, mdtol=None, **kwargs):
-        dask_result = dask_nanfunction(array, axis=axis, **kwargs)
-        if mdtol is None:
+        # Call the statistic to get the basic result (missing-data tolerant).
+        dask_result = dask_stats_function(array, axis=axis, **kwargs)
+        if mdtol is None or mdtol >= 1.0:
             result = dask_result
         else:
-            point_counts = np.sum(np.ones(array.shape), axis=axis)
-            point_mask_counts = da.sum(da.isnan(array), axis=axis)
-            masked_point_fractions = (point_mask_counts + 0.0) / point_counts
-            # Note: the +0.0 forces a floating-point divide.
+            # Build a lazy computation to compare the fraction of missing
+            # input points at each output point to the 'mdtol' threshold.
+            point_mask_counts = da.sum(da.ma.getmaskarray(array), axis=axis)
+            points_per_calc = array.size / dask_result.size
+            masked_point_fractions = point_mask_counts / points_per_calc
             boolean_mask = masked_point_fractions > mdtol
-            result = da.where(boolean_mask, np.nan, dask_result)
+            # Return an mdtol-masked version of the basic result.
+            result = da.ma.masked_array(da.ma.getdata(dask_result),
+                                        boolean_mask)
         return result
     return inner_stat
 
-
 MEAN = WeightedAggregator('mean', ma.average,
-                          lazy_func=_build_dask_mdtol_function(da.nanmean))
+                          lazy_func=_build_dask_mdtol_function(da.mean))
 """
 An :class:`~iris.analysis.Aggregator` instance that calculates
 the mean over a :class:`~iris.cube.Cube`, as computed by
@@ -1647,7 +1652,7 @@ def inner_stat(array, axis=-1, mdtol=None, **kwargs):
 
 
 STD_DEV = Aggregator('standard_deviation', ma.std, ddof=1,
-                     lazy_func=_build_dask_mdtol_function(da.nanstd))
+                     lazy_func=_build_dask_mdtol_function(da.std))
 """
 An :class:`~iris.analysis.Aggregator` instance that calculates
 the standard deviation over a :class:`~iris.cube.Cube`, as
@@ -1714,7 +1719,7 @@ def inner_stat(array, axis=-1, mdtol=None, **kwargs):
 VARIANCE = Aggregator('variance',
                       ma.var,
                       units_func=lambda units: units * units,
-                      lazy_func=_build_dask_mdtol_function(da.nanvar),
+                      lazy_func=_build_dask_mdtol_function(da.var),
                       ddof=1)
 """
 An :class:`~iris.analysis.Aggregator` instance that calculates

diff --git a/lib/iris/tests/unit/analysis/test_MEAN.py b/lib/iris/tests/unit/analysis/test_MEAN.py
@@ -33,31 +33,44 @@
 
 class Test_lazy_aggregate(tests.IrisTest):
     def setUp(self):
-        self.data = np.arange(12.0).reshape(3, 4)
-        self.data[2, 1:] = np.nan
+        self.data = ma.arange(12).reshape(3, 4)
+        self.data.mask = [[0, 0, 0, 1],
+                          [0, 0, 1, 1],
+                          [0, 1, 1, 1]]
+        # --> fractions of masked-points in columns = [0, 1/3, 2/3, 1]
         self.array = as_lazy_data(self.data)
-        masked_data = ma.masked_array(self.data,
-                                      mask=np.isnan(self.data))
         self.axis = 0
-        self.expected_masked = ma.mean(masked_data, axis=self.axis)
+        self.expected_masked = ma.mean(self.data, axis=self.axis)
 
     def test_mdtol_default(self):
+        # Default operation is "mdtol=1" --> unmasked if *any* valid points.
+        # --> output column masks = [0, 0, 0, 1]
         agg = MEAN.lazy_aggregate(self.array, axis=self.axis)
         masked_result = as_concrete_data(agg)
         self.assertMaskedArrayAlmostEqual(masked_result,
                                           self.expected_masked)
 
-    @tests.skip_dask_mask
-    def test_mdtol_below(self):
-        agg = MEAN.lazy_aggregate(self.array, axis=self.axis, mdtol=0.3)
+    def test_mdtol_belowall(self):
+        # Mdtol=0.25 --> masked columns = [0, 1, 1, 1]
+        agg = MEAN.lazy_aggregate(self.array, axis=self.axis, mdtol=0.25)
         masked_result = as_concrete_data(agg)
         expected_masked = self.expected_masked
         expected_masked.mask = [False, True, True, True]
         self.assertMaskedArrayAlmostEqual(masked_result,
                                           expected_masked)
 
-    def test_mdtol_above(self):
-        agg = MEAN.lazy_aggregate(self.array, axis=self.axis, mdtol=0.4)
+    def test_mdtol_intermediate(self):
+        # mdtol=0.5 --> masked columns = [0, 0, 1, 1]
+        agg = MEAN.lazy_aggregate(self.array, axis=self.axis, mdtol=0.5)
+        masked_result = as_concrete_data(agg)
+        expected_masked = self.expected_masked
+        expected_masked.mask = [False, False, True, True]
+        self.assertMaskedArrayAlmostEqual(masked_result, expected_masked)
+
+    def test_mdtol_aboveall(self):
+        # mdtol=0.75 --> masked columns = [0, 0, 0, 1]
+        # In this case, effectively the same as mdtol=None.
+        agg = MEAN.lazy_aggregate(self.array, axis=self.axis, mdtol=0.75)
         masked_result = as_concrete_data(agg)
         self.assertMaskedArrayAlmostEqual(masked_result,
                                           self.expected_masked)
@@ -71,6 +84,34 @@ def test_multi_axis(self):
         expected = np.mean(data, axis=collapse_axes)
         self.assertArrayAllClose(result, expected)
 
+    def test_last_axis(self):
+        # From setUp:
+        # self.data.mask = [[0, 0, 0, 1],
+        #                   [0, 0, 1, 1],
+        #                   [0, 1, 1, 1]]
+        # --> fractions of masked-points in ROWS = [1/4, 1/2, 3/4]
+        axis = -1
+        agg = MEAN.lazy_aggregate(self.array, axis=axis, mdtol=0.51)
+        expected_masked = ma.mean(self.data, axis=-1)
+        expected_masked = np.ma.masked_array(expected_masked, [0, 0, 1])
+        masked_result = as_concrete_data(agg)
+        self.assertMaskedArrayAlmostEqual(masked_result,
+                                          expected_masked)
+
+    def test_all_axes_belowtol(self):
+        agg = MEAN.lazy_aggregate(self.array, axis=None, mdtol=0.75)
+        expected_masked = ma.mean(self.data)
+        masked_result = as_concrete_data(agg)
+        self.assertMaskedArrayAlmostEqual(masked_result,
+                                          expected_masked)
+
+    def test_all_axes_abovetol(self):
+        agg = MEAN.lazy_aggregate(self.array, axis=None, mdtol=0.45)
+        expected_masked = ma.masked_less([0.0], 1)
+        masked_result = as_concrete_data(agg)
+        self.assertMaskedArrayAlmostEqual(masked_result,
+                                          expected_masked)
+
 
 class Test_name(tests.IrisTest):
     def test(self):

diff --git a/lib/iris/tests/unit/analysis/test_STD_DEV.py b/lib/iris/tests/unit/analysis/test_STD_DEV.py
@@ -30,15 +30,15 @@
 
 
 class Test_lazy_aggregate(tests.IrisTest):
-    @tests.skip_dask_mask
     def test_mdtol(self):
-        na = np.nan
-        array = np.array([[1., 2., 1., 2.],
-                          [1., 2., 3., na],
-                          [1., 2., na, na]])
+        na = -999.888
+        array = np.ma.masked_equal([[1., 2., 1., 2.],
+                                    [1., 2., 3., na],
+                                    [1., 2., na, na]],
+                                   na)
         array = as_lazy_data(array)
         var = STD_DEV.lazy_aggregate(array, axis=1, mdtol=0.3)
-        masked_result = as_concrete_data(var, nans_replacement=np.ma.masked)
+        masked_result = as_concrete_data(var)
         masked_expected = np.ma.masked_array([0.57735, 1., 0.707107],
                                              mask=[0, 0, 1])
         self.assertMaskedArrayAlmostEqual(masked_result, masked_expected)