diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py index ec4d341d25..2dd62f12f9 100644 --- a/lib/iris/analysis/__init__.py +++ b/lib/iris/analysis/__init__.py @@ -1260,12 +1260,29 @@ def _proportion(array, function, axis, **kwargs): def _rms(array, axis, **kwargs): + # XXX due to the current limitations in `da.average` (see below), maintain + # an explicit non-lazy aggregation function for now. + # Note: retaining this function also means that if weights are passed to + # the lazy aggregator, the aggregation will fall back to using this + # non-lazy aggregator. rval = np.sqrt(ma.average(np.square(array), axis=axis, **kwargs)) if not ma.isMaskedArray(array): rval = np.asarray(rval) return rval +@_build_dask_mdtol_function +def _lazy_rms(array, axis, **kwargs): + # XXX This should use `da.average` and not `da.mean`, as does the above. + # However `da.average` current doesn't handle masked weights correctly + # (see https://github.com/dask/dask/issues/3846). + # To work around this we use da.mean, which doesn't support weights at + # all. Thus trying to use this aggregator with weights will currently + # raise an error in dask due to the unexpected keyword `weights`, + # rather than silently returning the wrong answer. + return da.sqrt(da.mean(array ** 2, axis=axis, **kwargs)) + + @_build_dask_mdtol_function def _lazy_sum(array, **kwargs): array = iris._lazy_data.as_lazy_data(array) @@ -1654,7 +1671,8 @@ def interp_order(length): """ -RMS = WeightedAggregator('root mean square', _rms) +RMS = WeightedAggregator('root mean square', _rms, + lazy_func=_build_dask_mdtol_function(_lazy_rms)) """ An :class:`~iris.analysis.Aggregator` instance that calculates the root mean square over a :class:`~iris.cube.Cube`, as computed by diff --git a/lib/iris/tests/unit/analysis/test_RMS.py b/lib/iris/tests/unit/analysis/test_RMS.py index 57d92cd3c6..18ef63222c 100644 --- a/lib/iris/tests/unit/analysis/test_RMS.py +++ b/lib/iris/tests/unit/analysis/test_RMS.py @@ -26,6 +26,7 @@ import numpy as np import numpy.ma as ma +from iris._lazy_data import as_lazy_data from iris.analysis import RMS @@ -88,6 +89,95 @@ def test_masked_weighted(self): self.assertAlmostEqual(rms, expected_rms) +class Test_lazy_aggregate(tests.IrisTest): + def test_1d(self): + # 1-dimensional input. + data = as_lazy_data(np.array([5, 2, 6, 4], dtype=np.float64), + chunks=-1) + rms = RMS.lazy_aggregate(data, 0) + expected_rms = 4.5 + self.assertAlmostEqual(rms, expected_rms) + + def test_2d(self): + # 2-dimensional input. + data = as_lazy_data(np.array([[5, 2, 6, 4], [12, 4, 10, 8]], + dtype=np.float64), + chunks=-1) + expected_rms = np.array([4.5, 9.0], dtype=np.float64) + rms = RMS.lazy_aggregate(data, 1) + self.assertArrayAlmostEqual(rms, expected_rms) + + def test_1d_weighted(self): + # 1-dimensional input with weights. + data = as_lazy_data(np.array([4, 7, 10, 8], dtype=np.float64), + chunks=-1) + weights = np.array([1, 4, 3, 2], dtype=np.float64) + expected_rms = 8.0 + # https://github.com/dask/dask/issues/3846. + with self.assertRaisesRegexp(TypeError, 'unexpected keyword argument'): + rms = RMS.lazy_aggregate(data, 0, weights=weights) + self.assertAlmostEqual(rms, expected_rms) + + def test_1d_lazy_weighted(self): + # 1-dimensional input with lazy weights. + data = as_lazy_data(np.array([4, 7, 10, 8], dtype=np.float64), + chunks=-1) + weights = as_lazy_data(np.array([1, 4, 3, 2], dtype=np.float64), + chunks=-1) + expected_rms = 8.0 + # https://github.com/dask/dask/issues/3846. + with self.assertRaisesRegexp(TypeError, 'unexpected keyword argument'): + rms = RMS.lazy_aggregate(data, 0, weights=weights) + self.assertAlmostEqual(rms, expected_rms) + + def test_2d_weighted(self): + # 2-dimensional input with weights. + data = as_lazy_data(np.array([[4, 7, 10, 8], [14, 16, 20, 8]], + dtype=np.float64), + chunks=-1) + weights = np.array([[1, 4, 3, 2], [2, 1, 1.5, 0.5]], dtype=np.float64) + expected_rms = np.array([8.0, 16.0], dtype=np.float64) + # https://github.com/dask/dask/issues/3846. + with self.assertRaisesRegexp(TypeError, 'unexpected keyword argument'): + rms = RMS.lazy_aggregate(data, 1, weights=weights) + self.assertArrayAlmostEqual(rms, expected_rms) + + def test_unit_weighted(self): + # Unit weights should be the same as no weights. + data = as_lazy_data(np.array([5, 2, 6, 4], dtype=np.float64), + chunks=-1) + weights = np.ones_like(data) + expected_rms = 4.5 + # https://github.com/dask/dask/issues/3846. + with self.assertRaisesRegexp(TypeError, 'unexpected keyword argument'): + rms = RMS.lazy_aggregate(data, 0, weights=weights) + self.assertAlmostEqual(rms, expected_rms) + + def test_masked(self): + # Masked entries should be completely ignored. + data = as_lazy_data(ma.array([5, 10, 2, 11, 6, 4], + mask=[False, True, False, True, False, False], + dtype=np.float64), + chunks=-1) + expected_rms = 4.5 + rms = RMS.lazy_aggregate(data, 0) + self.assertAlmostEqual(rms, expected_rms) + + def test_masked_weighted(self): + # Weights should work properly with masked arrays, but currently don't + # (see https://github.com/dask/dask/issues/3846). + # For now, masked weights are simply not supported. + data = as_lazy_data(ma.array([4, 7, 18, 10, 11, 8], + mask=[False, False, True, False, True, False], + dtype=np.float64), + chunks=-1) + weights = np.array([1, 4, 5, 3, 8, 2]) + expected_rms = 8.0 + with self.assertRaisesRegexp(TypeError, 'unexpected keyword argument'): + rms = RMS.lazy_aggregate(data, 0, weights=weights) + self.assertAlmostEqual(rms, expected_rms) + + class Test_name(tests.IrisTest): def test(self): self.assertEqual(RMS.name(), 'root_mean_square')