From 1eed3dd4250d11484b4e16c0902ecc98c614e1b0 Mon Sep 17 00:00:00 2001 From: Peter Killick Date: Fri, 3 Aug 2018 09:48:07 +0100 Subject: [PATCH 1/6] WIP: lazy aggregator --- lib/iris/analysis/__init__.py | 8 ++- lib/iris/tests/unit/analysis/test_RMS.py | 67 ++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py index ec4d341d25..eed52aecfd 100644 --- a/lib/iris/analysis/__init__.py +++ b/lib/iris/analysis/__init__.py @@ -1266,6 +1266,11 @@ def _rms(array, axis, **kwargs): return rval +@_build_dask_mdtol_function +def _lazy_rms(array, axis, **kwargs): + return da.sqrt(da.average(array ** 2, axis=axis, **kwargs)) + + @_build_dask_mdtol_function def _lazy_sum(array, **kwargs): array = iris._lazy_data.as_lazy_data(array) @@ -1654,7 +1659,8 @@ def interp_order(length): """ -RMS = WeightedAggregator('root mean square', _rms) +RMS = WeightedAggregator('root mean square', _rms, + lazy_func=_build_dask_mdtol_function(_lazy_rms)) """ An :class:`~iris.analysis.Aggregator` instance that calculates the root mean square over a :class:`~iris.cube.Cube`, as computed by diff --git a/lib/iris/tests/unit/analysis/test_RMS.py b/lib/iris/tests/unit/analysis/test_RMS.py index 57d92cd3c6..17d69c3386 100644 --- a/lib/iris/tests/unit/analysis/test_RMS.py +++ b/lib/iris/tests/unit/analysis/test_RMS.py @@ -26,6 +26,7 @@ import numpy as np import numpy.ma as ma +from iris._lazy_data import as_lazy_data from iris.analysis import RMS @@ -88,6 +89,72 @@ def test_masked_weighted(self): self.assertAlmostEqual(rms, expected_rms) +class Test_lazy_aggregate(tests.IrisTest): + def test_1d(self): + # 1-dimensional input + data = as_lazy_data(np.array([5, 2, 6, 4], dtype=np.float64), + chunks=-1) + rms = RMS.aggregate(data, 0) + expected_rms = 4.5 + self.assertAlmostEqual(rms, expected_rms) + + def test_2d(self): + # 2-dimensional input + data = as_lazy_data(np.array([[5, 2, 6, 4], [12, 4, 10, 8]], + dtype=np.float64), + chunks=-1) + expected_rms = np.array([4.5, 9.0], dtype=np.float64) + rms = RMS.aggregate(data, 1) + self.assertArrayAlmostEqual(rms, expected_rms) + + def test_1d_weighted(self): + # 1-dimensional input with weights + data = as_lazy_data(np.array([4, 7, 10, 8], dtype=np.float64), + chunks=-1) + weights = np.array([1, 4, 3, 2], dtype=np.float64) + expected_rms = 8.0 + rms = RMS.aggregate(data, 0, weights=weights) + self.assertAlmostEqual(rms, expected_rms) + + def test_2d_weighted(self): + # 2-dimensional input with weights + data = as_lazy_data(np.array([[4, 7, 10, 8], [14, 16, 20, 8]], + dtype=np.float64), + chunks=-1) + weights = np.array([[1, 4, 3, 2], [2, 1, 1.5, 0.5]], dtype=np.float64) + expected_rms = np.array([8.0, 16.0], dtype=np.float64) + rms = RMS.aggregate(data, 1, weights=weights) + self.assertArrayAlmostEqual(rms, expected_rms) + + def test_unit_weighted(self): + # unit weights should be the same as no weights + data = as_lazy_data(np.array([5, 2, 6, 4], dtype=np.float64), + chunks=-1) + weights = np.ones_like(data) + rms = RMS.aggregate(data, 0, weights=weights) + expected_rms = 4.5 + self.assertAlmostEqual(rms, expected_rms) + + def test_masked(self): + # masked entries should be completely ignored + data = ma.array([5, 10, 2, 11, 6, 4], + mask=[False, True, False, True, False, False], + dtype=np.float64) + expected_rms = 4.5 + rms = RMS.aggregate(data, 0) + self.assertAlmostEqual(rms, expected_rms) + + def test_masked_weighted(self): + # weights should work properly with masked arrays + data = ma.array([4, 7, 18, 10, 11, 8], + mask=[False, False, True, False, True, False], + dtype=np.float64) + weights = np.array([1, 4, 5, 3, 8, 2], dtype=np.float64) + expected_rms = 8.0 + rms = RMS.aggregate(data, 0, weights=weights) + self.assertAlmostEqual(rms, expected_rms) + + class Test_name(tests.IrisTest): def test(self): self.assertEqual(RMS.name(), 'root_mean_square') From 5df411262ead8b4d196c29cfcd4ce97fa3569420 Mon Sep 17 00:00:00 2001 From: Niall Robinson Date: Fri, 3 Aug 2018 10:41:13 +0100 Subject: [PATCH 2/6] WIP --- lib/iris/tests/unit/analysis/test_RMS.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/iris/tests/unit/analysis/test_RMS.py b/lib/iris/tests/unit/analysis/test_RMS.py index 17d69c3386..eb9d1a70cc 100644 --- a/lib/iris/tests/unit/analysis/test_RMS.py +++ b/lib/iris/tests/unit/analysis/test_RMS.py @@ -137,18 +137,20 @@ def test_unit_weighted(self): def test_masked(self): # masked entries should be completely ignored - data = ma.array([5, 10, 2, 11, 6, 4], + data = as_lazy_data(ma.array([5, 10, 2, 11, 6, 4], mask=[False, True, False, True, False, False], - dtype=np.float64) + dtype=np.float64), + chunks=-1) expected_rms = 4.5 rms = RMS.aggregate(data, 0) self.assertAlmostEqual(rms, expected_rms) def test_masked_weighted(self): # weights should work properly with masked arrays - data = ma.array([4, 7, 18, 10, 11, 8], + data = as_lazy_data(ma.array([4, 7, 18, 10, 11, 8], mask=[False, False, True, False, True, False], - dtype=np.float64) + dtype=np.float64), + chunks=-1) weights = np.array([1, 4, 5, 3, 8, 2], dtype=np.float64) expected_rms = 8.0 rms = RMS.aggregate(data, 0, weights=weights) From b8f7de9ed6996f1ecd6a13982546b711c1a629dd Mon Sep 17 00:00:00 2001 From: Peter Killick Date: Fri, 3 Aug 2018 16:05:52 +0100 Subject: [PATCH 3/6] Lazy RMS aggregator --- lib/iris/analysis/__init__.py | 7 ++- lib/iris/tests/unit/analysis/test_RMS.py | 63 ++++++++++++++++-------- 2 files changed, 48 insertions(+), 22 deletions(-) diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py index eed52aecfd..890a854e24 100644 --- a/lib/iris/analysis/__init__.py +++ b/lib/iris/analysis/__init__.py @@ -1268,7 +1268,12 @@ def _rms(array, axis, **kwargs): @_build_dask_mdtol_function def _lazy_rms(array, axis, **kwargs): - return da.sqrt(da.average(array ** 2, axis=axis, **kwargs)) + # XXX This should use `da.average` and not `da.mean`, as does the above. + # However `da.average` current doesn't handle masked weights correctly + # (see https://github.com/dask/dask/issues/3846). + # To work around this we use da.mean, which doesn't support weights at + # all, so we raise an error rather than silently giving the wrong answer. + return da.sqrt(da.mean(array ** 2, axis=axis, **kwargs)) @_build_dask_mdtol_function diff --git a/lib/iris/tests/unit/analysis/test_RMS.py b/lib/iris/tests/unit/analysis/test_RMS.py index eb9d1a70cc..f58d71f8b9 100644 --- a/lib/iris/tests/unit/analysis/test_RMS.py +++ b/lib/iris/tests/unit/analysis/test_RMS.py @@ -91,70 +91,91 @@ def test_masked_weighted(self): class Test_lazy_aggregate(tests.IrisTest): def test_1d(self): - # 1-dimensional input + # 1-dimensional input. data = as_lazy_data(np.array([5, 2, 6, 4], dtype=np.float64), chunks=-1) - rms = RMS.aggregate(data, 0) + rms = RMS.lazy_aggregate(data, 0) expected_rms = 4.5 self.assertAlmostEqual(rms, expected_rms) def test_2d(self): - # 2-dimensional input + # 2-dimensional input. data = as_lazy_data(np.array([[5, 2, 6, 4], [12, 4, 10, 8]], dtype=np.float64), chunks=-1) expected_rms = np.array([4.5, 9.0], dtype=np.float64) - rms = RMS.aggregate(data, 1) + rms = RMS.lazy_aggregate(data, 1) self.assertArrayAlmostEqual(rms, expected_rms) def test_1d_weighted(self): - # 1-dimensional input with weights + # 1-dimensional input with weights. data = as_lazy_data(np.array([4, 7, 10, 8], dtype=np.float64), chunks=-1) weights = np.array([1, 4, 3, 2], dtype=np.float64) expected_rms = 8.0 - rms = RMS.aggregate(data, 0, weights=weights) - self.assertAlmostEqual(rms, expected_rms) + # https://github.com/dask/dask/issues/3846. + with self.assertRaises(TypeError): + rms = RMS.lazy_aggregate(data, 0, weights=weights) + self.assertAlmostEqual(rms, expected_rms) + + def test_1d_lazy_weighted(self): + # 1-dimensional input with lazy weights. + data = as_lazy_data(np.array([4, 7, 10, 8], dtype=np.float64), + chunks=-1) + weights = as_lazy_data(np.array([1, 4, 3, 2], dtype=np.float64), + chunks=-1) + expected_rms = 8.0 + # https://github.com/dask/dask/issues/3846. + with self.assertRaises(TypeError): + rms = RMS.lazy_aggregate(data, 0, weights=weights) + self.assertAlmostEqual(rms, expected_rms) def test_2d_weighted(self): - # 2-dimensional input with weights + # 2-dimensional input with weights. data = as_lazy_data(np.array([[4, 7, 10, 8], [14, 16, 20, 8]], dtype=np.float64), chunks=-1) weights = np.array([[1, 4, 3, 2], [2, 1, 1.5, 0.5]], dtype=np.float64) expected_rms = np.array([8.0, 16.0], dtype=np.float64) - rms = RMS.aggregate(data, 1, weights=weights) - self.assertArrayAlmostEqual(rms, expected_rms) + # https://github.com/dask/dask/issues/3846. + with self.assertRaises(TypeError): + rms = RMS.lazy_aggregate(data, 1, weights=weights) + self.assertArrayAlmostEqual(rms, expected_rms) def test_unit_weighted(self): - # unit weights should be the same as no weights + # Unit weights should be the same as no weights. data = as_lazy_data(np.array([5, 2, 6, 4], dtype=np.float64), chunks=-1) weights = np.ones_like(data) - rms = RMS.aggregate(data, 0, weights=weights) expected_rms = 4.5 - self.assertAlmostEqual(rms, expected_rms) + # https://github.com/dask/dask/issues/3846. + with self.assertRaises(TypeError): + rms = RMS.lazy_aggregate(data, 0, weights=weights) + self.assertAlmostEqual(rms, expected_rms) def test_masked(self): - # masked entries should be completely ignored + # Masked entries should be completely ignored. data = as_lazy_data(ma.array([5, 10, 2, 11, 6, 4], mask=[False, True, False, True, False, False], dtype=np.float64), chunks=-1) expected_rms = 4.5 - rms = RMS.aggregate(data, 0) + rms = RMS.lazy_aggregate(data, 0) self.assertAlmostEqual(rms, expected_rms) def test_masked_weighted(self): - # weights should work properly with masked arrays + # Weights should work properly with masked arrays, but currently don't + # (see https://github.com/dask/dask/issues/3846). + # For now, masked weights are simply not supported. data = as_lazy_data(ma.array([4, 7, 18, 10, 11, 8], - mask=[False, False, True, False, True, False], - dtype=np.float64), + mask=[False, False, True, False, True, False], + dtype=np.float64), chunks=-1) - weights = np.array([1, 4, 5, 3, 8, 2], dtype=np.float64) + weights = np.array([1, 4, 5, 3, 8, 2]) expected_rms = 8.0 - rms = RMS.aggregate(data, 0, weights=weights) - self.assertAlmostEqual(rms, expected_rms) + with self.assertRaisesRegexp(TypeError, 'unexpected keyword argument'): + rms = RMS.lazy_aggregate(data, 0, weights=weights) + self.assertAlmostEqual(rms, expected_rms) class Test_name(tests.IrisTest): From 5870c642fb690dd9a6616d38f25345e8e7bac169 Mon Sep 17 00:00:00 2001 From: Peter Killick Date: Fri, 3 Aug 2018 16:27:12 +0100 Subject: [PATCH 4/6] pep8 --- lib/iris/tests/unit/analysis/test_RMS.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/iris/tests/unit/analysis/test_RMS.py b/lib/iris/tests/unit/analysis/test_RMS.py index f58d71f8b9..210a7ad058 100644 --- a/lib/iris/tests/unit/analysis/test_RMS.py +++ b/lib/iris/tests/unit/analysis/test_RMS.py @@ -156,8 +156,8 @@ def test_unit_weighted(self): def test_masked(self): # Masked entries should be completely ignored. data = as_lazy_data(ma.array([5, 10, 2, 11, 6, 4], - mask=[False, True, False, True, False, False], - dtype=np.float64), + mask=[False, True, False, True, False, False], + dtype=np.float64), chunks=-1) expected_rms = 4.5 rms = RMS.lazy_aggregate(data, 0) From 4c7adcf8c8115362f3c774bc8374865beb1e0c71 Mon Sep 17 00:00:00 2001 From: Peter Killick Date: Tue, 14 Aug 2018 14:59:32 +0100 Subject: [PATCH 5/6] Improve comment wording to clarify the error that's raised --- lib/iris/analysis/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py index 890a854e24..a3294cc291 100644 --- a/lib/iris/analysis/__init__.py +++ b/lib/iris/analysis/__init__.py @@ -1272,7 +1272,9 @@ def _lazy_rms(array, axis, **kwargs): # However `da.average` current doesn't handle masked weights correctly # (see https://github.com/dask/dask/issues/3846). # To work around this we use da.mean, which doesn't support weights at - # all, so we raise an error rather than silently giving the wrong answer. + # all. Thus trying to use this aggregator with weights will currently + # raise an error in dask due to the unexpected keyword `weights`, + # rather than silently returning the wrong answer. return da.sqrt(da.mean(array ** 2, axis=axis, **kwargs)) From 81680391aa59cb1ca8c6a17bc7ce67af2ae1b88d Mon Sep 17 00:00:00 2001 From: Peter Killick Date: Wed, 24 Oct 2018 16:05:49 +0100 Subject: [PATCH 6/6] Rebase and testing updates --- lib/iris/analysis/__init__.py | 5 +++++ lib/iris/tests/unit/analysis/test_RMS.py | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py index a3294cc291..2dd62f12f9 100644 --- a/lib/iris/analysis/__init__.py +++ b/lib/iris/analysis/__init__.py @@ -1260,6 +1260,11 @@ def _proportion(array, function, axis, **kwargs): def _rms(array, axis, **kwargs): + # XXX due to the current limitations in `da.average` (see below), maintain + # an explicit non-lazy aggregation function for now. + # Note: retaining this function also means that if weights are passed to + # the lazy aggregator, the aggregation will fall back to using this + # non-lazy aggregator. rval = np.sqrt(ma.average(np.square(array), axis=axis, **kwargs)) if not ma.isMaskedArray(array): rval = np.asarray(rval) diff --git a/lib/iris/tests/unit/analysis/test_RMS.py b/lib/iris/tests/unit/analysis/test_RMS.py index 210a7ad058..18ef63222c 100644 --- a/lib/iris/tests/unit/analysis/test_RMS.py +++ b/lib/iris/tests/unit/analysis/test_RMS.py @@ -114,7 +114,7 @@ def test_1d_weighted(self): weights = np.array([1, 4, 3, 2], dtype=np.float64) expected_rms = 8.0 # https://github.com/dask/dask/issues/3846. - with self.assertRaises(TypeError): + with self.assertRaisesRegexp(TypeError, 'unexpected keyword argument'): rms = RMS.lazy_aggregate(data, 0, weights=weights) self.assertAlmostEqual(rms, expected_rms) @@ -126,7 +126,7 @@ def test_1d_lazy_weighted(self): chunks=-1) expected_rms = 8.0 # https://github.com/dask/dask/issues/3846. - with self.assertRaises(TypeError): + with self.assertRaisesRegexp(TypeError, 'unexpected keyword argument'): rms = RMS.lazy_aggregate(data, 0, weights=weights) self.assertAlmostEqual(rms, expected_rms) @@ -138,7 +138,7 @@ def test_2d_weighted(self): weights = np.array([[1, 4, 3, 2], [2, 1, 1.5, 0.5]], dtype=np.float64) expected_rms = np.array([8.0, 16.0], dtype=np.float64) # https://github.com/dask/dask/issues/3846. - with self.assertRaises(TypeError): + with self.assertRaisesRegexp(TypeError, 'unexpected keyword argument'): rms = RMS.lazy_aggregate(data, 1, weights=weights) self.assertArrayAlmostEqual(rms, expected_rms) @@ -149,7 +149,7 @@ def test_unit_weighted(self): weights = np.ones_like(data) expected_rms = 4.5 # https://github.com/dask/dask/issues/3846. - with self.assertRaises(TypeError): + with self.assertRaisesRegexp(TypeError, 'unexpected keyword argument'): rms = RMS.lazy_aggregate(data, 0, weights=weights) self.assertAlmostEqual(rms, expected_rms)