Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 22 additions & 17 deletions lib/iris/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1419,36 +1419,41 @@ def interp_order(length):
"""


def _build_dask_mdtol_function(dask_nanfunction):
def _build_dask_mdtol_function(dask_stats_function):
"""
Make a wrapped dask statistic function that supports the 'mdtol' keyword.

'dask_nanfunction' must be a statistical function that treats NaNs as
missing data, and which has the basic call signature
"dask_nanfunction(data, axis, **kwargs)".
'dask_function' must be a dask statistical function, compatible with the
call signature : "dask_stats_function(data, axis, **kwargs)".
It must be masked-data tolerant, i.e. it ignores masked input points and
performs a calculation on only the unmasked points.
For example, mean([1, --, 2]) = (1 + 2) / 2 = 1.5.

The returned value is a new function operating on dask arrays.
It has the call signature "stat(data, axis=-1, mdtol=None, *kwargs)".
It has the call signature `stat(data, axis=-1, mdtol=None, **kwargs)`.

"""
@wraps(dask_nanfunction)
@wraps(dask_stats_function)
def inner_stat(array, axis=-1, mdtol=None, **kwargs):
dask_result = dask_nanfunction(array, axis=axis, **kwargs)
if mdtol is None:
# Call the statistic to get the basic result (missing-data tolerant).
dask_result = dask_stats_function(array, axis=axis, **kwargs)
if mdtol is None or mdtol >= 1.0:
result = dask_result
else:
point_counts = np.sum(np.ones(array.shape), axis=axis)
point_mask_counts = da.sum(da.isnan(array), axis=axis)
masked_point_fractions = (point_mask_counts + 0.0) / point_counts
# Note: the +0.0 forces a floating-point divide.
# Build a lazy computation to compare the fraction of missing
# input points at each output point to the 'mdtol' threshold.
point_mask_counts = da.sum(da.ma.getmaskarray(array), axis=axis)
points_per_calc = array.size / dask_result.size
masked_point_fractions = point_mask_counts / points_per_calc
boolean_mask = masked_point_fractions > mdtol
result = da.where(boolean_mask, np.nan, dask_result)
# Return an mdtol-masked version of the basic result.
result = da.ma.masked_array(da.ma.getdata(dask_result),
boolean_mask)
return result
return inner_stat


MEAN = WeightedAggregator('mean', ma.average,
lazy_func=_build_dask_mdtol_function(da.nanmean))
lazy_func=_build_dask_mdtol_function(da.mean))
"""
An :class:`~iris.analysis.Aggregator` instance that calculates
the mean over a :class:`~iris.cube.Cube`, as computed by
Expand Down Expand Up @@ -1647,7 +1652,7 @@ def inner_stat(array, axis=-1, mdtol=None, **kwargs):


STD_DEV = Aggregator('standard_deviation', ma.std, ddof=1,
lazy_func=_build_dask_mdtol_function(da.nanstd))
lazy_func=_build_dask_mdtol_function(da.std))
"""
An :class:`~iris.analysis.Aggregator` instance that calculates
the standard deviation over a :class:`~iris.cube.Cube`, as
Expand Down Expand Up @@ -1714,7 +1719,7 @@ def inner_stat(array, axis=-1, mdtol=None, **kwargs):
VARIANCE = Aggregator('variance',
ma.var,
units_func=lambda units: units * units,
lazy_func=_build_dask_mdtol_function(da.nanvar),
lazy_func=_build_dask_mdtol_function(da.var),
ddof=1)
"""
An :class:`~iris.analysis.Aggregator` instance that calculates
Expand Down
61 changes: 51 additions & 10 deletions lib/iris/tests/unit/analysis/test_MEAN.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,31 +33,44 @@

class Test_lazy_aggregate(tests.IrisTest):
def setUp(self):
self.data = np.arange(12.0).reshape(3, 4)
self.data[2, 1:] = np.nan
self.data = ma.arange(12).reshape(3, 4)
self.data.mask = [[0, 0, 0, 1],
[0, 0, 1, 1],
[0, 1, 1, 1]]
# --> fractions of masked-points in columns = [0, 1/3, 2/3, 1]
self.array = as_lazy_data(self.data)
masked_data = ma.masked_array(self.data,
mask=np.isnan(self.data))
self.axis = 0
self.expected_masked = ma.mean(masked_data, axis=self.axis)
self.expected_masked = ma.mean(self.data, axis=self.axis)

def test_mdtol_default(self):
# Default operation is "mdtol=1" --> unmasked if *any* valid points.
# --> output column masks = [0, 0, 0, 1]
agg = MEAN.lazy_aggregate(self.array, axis=self.axis)
masked_result = as_concrete_data(agg)
self.assertMaskedArrayAlmostEqual(masked_result,
self.expected_masked)

@tests.skip_dask_mask
def test_mdtol_below(self):
agg = MEAN.lazy_aggregate(self.array, axis=self.axis, mdtol=0.3)
def test_mdtol_belowall(self):
# Mdtol=0.25 --> masked columns = [0, 1, 1, 1]
agg = MEAN.lazy_aggregate(self.array, axis=self.axis, mdtol=0.25)
masked_result = as_concrete_data(agg)
expected_masked = self.expected_masked
expected_masked.mask = [False, True, True, True]
self.assertMaskedArrayAlmostEqual(masked_result,
expected_masked)

def test_mdtol_above(self):
agg = MEAN.lazy_aggregate(self.array, axis=self.axis, mdtol=0.4)
def test_mdtol_intermediate(self):
# mdtol=0.5 --> masked columns = [0, 0, 1, 1]
agg = MEAN.lazy_aggregate(self.array, axis=self.axis, mdtol=0.5)
masked_result = as_concrete_data(agg)
expected_masked = self.expected_masked
expected_masked.mask = [False, False, True, True]
self.assertMaskedArrayAlmostEqual(masked_result, expected_masked)

def test_mdtol_aboveall(self):
# mdtol=0.75 --> masked columns = [0, 0, 0, 1]
# In this case, effectively the same as mdtol=None.
agg = MEAN.lazy_aggregate(self.array, axis=self.axis, mdtol=0.75)
masked_result = as_concrete_data(agg)
self.assertMaskedArrayAlmostEqual(masked_result,
self.expected_masked)
Expand All @@ -71,6 +84,34 @@ def test_multi_axis(self):
expected = np.mean(data, axis=collapse_axes)
self.assertArrayAllClose(result, expected)

def test_last_axis(self):
# From setUp:
# self.data.mask = [[0, 0, 0, 1],
# [0, 0, 1, 1],
# [0, 1, 1, 1]]
# --> fractions of masked-points in ROWS = [1/4, 1/2, 3/4]
axis = -1
agg = MEAN.lazy_aggregate(self.array, axis=axis, mdtol=0.51)
expected_masked = ma.mean(self.data, axis=-1)
expected_masked = np.ma.masked_array(expected_masked, [0, 0, 1])
masked_result = as_concrete_data(agg)
self.assertMaskedArrayAlmostEqual(masked_result,
expected_masked)

def test_all_axes_belowtol(self):
agg = MEAN.lazy_aggregate(self.array, axis=None, mdtol=0.75)
expected_masked = ma.mean(self.data)
masked_result = as_concrete_data(agg)
self.assertMaskedArrayAlmostEqual(masked_result,
expected_masked)

def test_all_axes_abovetol(self):
agg = MEAN.lazy_aggregate(self.array, axis=None, mdtol=0.45)
expected_masked = ma.masked_less([0.0], 1)
masked_result = as_concrete_data(agg)
self.assertMaskedArrayAlmostEqual(masked_result,
expected_masked)


class Test_name(tests.IrisTest):
def test(self):
Expand Down
12 changes: 6 additions & 6 deletions lib/iris/tests/unit/analysis/test_STD_DEV.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@


class Test_lazy_aggregate(tests.IrisTest):
@tests.skip_dask_mask
def test_mdtol(self):
na = np.nan
array = np.array([[1., 2., 1., 2.],
[1., 2., 3., na],
[1., 2., na, na]])
na = -999.888
array = np.ma.masked_equal([[1., 2., 1., 2.],
[1., 2., 3., na],
[1., 2., na, na]],
na)
array = as_lazy_data(array)
var = STD_DEV.lazy_aggregate(array, axis=1, mdtol=0.3)
masked_result = as_concrete_data(var, nans_replacement=np.ma.masked)
masked_result = as_concrete_data(var)
masked_expected = np.ma.masked_array([0.57735, 1., 0.707107],
mask=[0, 0, 1])
self.assertMaskedArrayAlmostEqual(masked_result, masked_expected)
Expand Down