-
Notifications
You must be signed in to change notification settings - Fork 300
[ENH] More lazy aggregators #3168
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
5bd0d15
6186081
2663a5a
b4bf847
b1b4b59
00ee159
76b819b
9908eba
e4e07ad
50fad00
aa0fc4e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1193,11 +1193,22 @@ def _weighted_percentile(data, axis, weights, percent, returned=False, | |
|
|
||
| def _count(array, function, axis, **kwargs): | ||
| if not callable(function): | ||
| raise ValueError('function must be a callable. Got %s.' | ||
| % type(function)) | ||
| emsg = 'function must be a callable. Got {}.' | ||
| raise TypeError(emsg.format(type(function))) | ||
| return ma.sum(function(array), axis=axis, **kwargs) | ||
|
|
||
|
|
||
| def _lazy_count(array, axis, **kwargs): | ||
| try: | ||
| func = kwargs.pop('function') | ||
| except KeyError: | ||
| raise KeyError('no selection function supplied.') | ||
| if not callable(func): | ||
| emsg = 'function must be a callable. Got {}.' | ||
| raise TypeError(emsg.format(type(func))) | ||
| return da.sum(func(array), axis=axis, **kwargs) | ||
|
|
||
|
|
||
| def _proportion(array, function, axis, **kwargs): | ||
| # if the incoming array is masked use that to count the total number of | ||
| # values | ||
|
|
@@ -1248,6 +1259,25 @@ def _sum(array, **kwargs): | |
| return rvalue | ||
|
|
||
|
|
||
| def _lazy_sum(array, axis, **kwargs): | ||
|
||
| # weighted or scaled sum | ||
| weights_in = kwargs.pop('weights', None) | ||
| returned_in = kwargs.pop('returned', False) | ||
| if weights_in is not None: | ||
| wsum = da.sum(weights_in * array, axis=axis, **kwargs) | ||
| else: | ||
| wsum = da.sum(array, axis=axis, **kwargs) | ||
| if returned_in: | ||
| if weights_in is None: | ||
| weights = iris_lazy_data.as_lazy_data(np.ones_like(array)) | ||
| else: | ||
| weights = weights_in | ||
| rvalue = (wsum, da.sum(weights, axis=axis)) | ||
| else: | ||
| rvalue = wsum | ||
| return rvalue | ||
|
|
||
|
|
||
| def _peak(array, **kwargs): | ||
| def column_segments(column): | ||
| nan_indices = np.where(np.isnan(column))[0] | ||
|
|
@@ -1353,7 +1383,8 @@ def interp_order(length): | |
| # Common partial Aggregation class constructors. | ||
| # | ||
| COUNT = Aggregator('count', _count, | ||
| units_func=lambda units: 1) | ||
| units_func=lambda units: 1, | ||
| lazy_func=_lazy_count) | ||
| """ | ||
| An :class:`~iris.analysis.Aggregator` instance that counts the number | ||
| of :class:`~iris.cube.Cube` data occurrences that satisfy a particular | ||
|
|
@@ -1419,23 +1450,6 @@ def interp_order(length): | |
| """ | ||
|
|
||
|
|
||
| MAX = Aggregator('maximum', ma.max) | ||
| """ | ||
| An :class:`~iris.analysis.Aggregator` instance that calculates | ||
| the maximum over a :class:`~iris.cube.Cube`, as computed by | ||
| :func:`numpy.ma.max`. | ||
|
|
||
| **For example**: | ||
|
|
||
| To compute zonal maximums over the *longitude* axis of a cube:: | ||
|
|
||
| result = cube.collapsed('longitude', iris.analysis.MAX) | ||
|
|
||
| This aggregator handles masked data. | ||
|
|
||
| """ | ||
|
|
||
|
|
||
| def _build_dask_mdtol_function(dask_stats_function): | ||
| """ | ||
| Make a wrapped dask statistic function that supports the 'mdtol' keyword. | ||
|
|
@@ -1469,6 +1483,7 @@ def inner_stat(array, axis=-1, mdtol=None, **kwargs): | |
| return result | ||
| return inner_stat | ||
|
|
||
|
|
||
| MEAN = WeightedAggregator('mean', ma.average, | ||
| lazy_func=_build_dask_mdtol_function(da.mean)) | ||
| """ | ||
|
|
@@ -1534,7 +1549,8 @@ def inner_stat(array, axis=-1, mdtol=None, **kwargs): | |
| """ | ||
|
|
||
|
|
||
| MIN = Aggregator('minimum', ma.min) | ||
| MIN = Aggregator('minimum', ma.min, | ||
| lazy_func=_build_dask_mdtol_function(da.min)) | ||
| """ | ||
| An :class:`~iris.analysis.Aggregator` instance that calculates | ||
| the minimum over a :class:`~iris.cube.Cube`, as computed by | ||
|
|
@@ -1551,6 +1567,24 @@ def inner_stat(array, axis=-1, mdtol=None, **kwargs): | |
| """ | ||
|
|
||
|
|
||
| MAX = Aggregator('maximum', ma.max, | ||
| lazy_func=_build_dask_mdtol_function(da.max)) | ||
| """ | ||
| An :class:`~iris.analysis.Aggregator` instance that calculates | ||
| the maximum over a :class:`~iris.cube.Cube`, as computed by | ||
| :func:`numpy.ma.max`. | ||
|
|
||
| **For example**: | ||
|
|
||
| To compute zonal maximums over the *longitude* axis of a cube:: | ||
|
|
||
| result = cube.collapsed('longitude', iris.analysis.MAX) | ||
|
|
||
| This aggregator handles masked data. | ||
|
|
||
| """ | ||
|
|
||
|
|
||
| PEAK = Aggregator('peak', _peak) | ||
| """ | ||
| An :class:`~iris.analysis.Aggregator` instance that calculates | ||
|
|
@@ -1700,7 +1734,7 @@ def inner_stat(array, axis=-1, mdtol=None, **kwargs): | |
| """ | ||
|
|
||
|
|
||
| SUM = WeightedAggregator('sum', _sum) | ||
| SUM = WeightedAggregator('sum', _sum, lazy_func=_lazy_sum) | ||
| """ | ||
| An :class:`~iris.analysis.Aggregator` instance that calculates | ||
| the sum over a :class:`~iris.cube.Cube`, as computed by :func:`numpy.ma.sum`. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| # (C) British Crown Copyright 2013 - 2017, Met Office | ||
| # (C) British Crown Copyright 2013 - 2018, Met Office | ||
| # | ||
| # This file is part of Iris. | ||
| # | ||
|
|
@@ -23,11 +23,51 @@ | |
| # importing anything else. | ||
| import iris.tests as tests | ||
|
|
||
| import numpy as np | ||
| import numpy.ma as ma | ||
|
|
||
| from iris.analysis import COUNT | ||
| import iris.cube | ||
| from iris.cube import Cube | ||
| from iris.coords import DimCoord | ||
| from iris._lazy_data import as_lazy_data | ||
|
|
||
|
|
||
| class Test_basics(tests.IrisTest): | ||
| def setUp(self): | ||
| data = np.array([1, 2, 3, 4, 5]) | ||
| coord = DimCoord([6, 7, 8, 9, 10], long_name='foo') | ||
| self.cube = Cube(data) | ||
| self.cube.add_dim_coord(coord, 0) | ||
| self.lazy_cube = Cube(as_lazy_data(data)) | ||
| self.lazy_cube.add_dim_coord(coord, 0) | ||
| self.func = lambda x: x >= 3 | ||
|
|
||
| def test_name(self): | ||
| self.assertEqual(COUNT.name(), 'count') | ||
|
|
||
| def test_no_function(self): | ||
| with self.assertRaisesRegexp(KeyError, 'no selection function'): | ||
| self.lazy_cube.collapsed("foo", COUNT) | ||
|
||
|
|
||
| def test_not_callable(self): | ||
| with self.assertRaisesRegexp(TypeError, 'function must be a callable'): | ||
| self.cube.collapsed("foo", COUNT, function='wibble') | ||
|
|
||
| def test_lazy_not_callable(self): | ||
| with self.assertRaisesRegexp(TypeError, 'function must be a callable'): | ||
| self.lazy_cube.collapsed("foo", COUNT, function='wibble') | ||
|
|
||
| def test_collapse(self): | ||
| cube = self.cube.collapsed("foo", COUNT, function=self.func) | ||
| self.assertArrayEqual(cube.data, [3]) | ||
|
|
||
| def test_lazy(self): | ||
| cube = self.lazy_cube.collapsed("foo", COUNT, function=self.func) | ||
| self.assertTrue(cube.has_lazy_data()) | ||
|
|
||
| def test_lazy_collapse(self): | ||
| cube = self.lazy_cube.collapsed("foo", COUNT, function=self.func) | ||
| self.assertArrayEqual(cube.data, [3]) | ||
|
|
||
|
|
||
| class Test_units_func(tests.IrisTest): | ||
|
|
@@ -39,7 +79,7 @@ def test(self): | |
|
|
||
| class Test_masked(tests.IrisTest): | ||
| def setUp(self): | ||
| self.cube = iris.cube.Cube(ma.masked_equal([1, 2, 3, 4, 5], 3)) | ||
| self.cube = Cube(ma.masked_equal([1, 2, 3, 4, 5], 3)) | ||
| self.cube.add_dim_coord(DimCoord([6, 7, 8, 9, 10], long_name='foo'), 0) | ||
| self.func = lambda x: x >= 3 | ||
|
|
||
|
|
@@ -48,9 +88,37 @@ def test_ma(self): | |
| self.assertArrayEqual(cube.data, [2]) | ||
|
|
||
|
|
||
| class Test_name(tests.IrisTest): | ||
| def test(self): | ||
| self.assertEqual(COUNT.name(), 'count') | ||
| class Test_lazy(tests.IrisTest): | ||
| def setUp(self): | ||
| data = np.array([1, 2, 3, 4, 5]) | ||
| self.cube = Cube(as_lazy_data(data)) | ||
| self.cube.add_dim_coord(DimCoord([6, 7, 8, 9, 10], long_name='foo'), 0) | ||
| self.func = lambda x: x >= 3 | ||
|
|
||
| def test_lazy_oper(self): | ||
| cube = self.cube.collapsed("foo", COUNT, function=self.func) | ||
| self.assertTrue(cube.has_lazy_data()) | ||
|
|
||
| def test_collapse(self): | ||
| result = self.cube.collapsed("foo", COUNT, function=self.func) | ||
| self.cube.data | ||
| expected = self.cube.collapsed("foo", COUNT, function=self.func) | ||
| self.assertArrayEqual(result.data, expected.data) | ||
|
|
||
|
|
||
| class Test_lazy_masked(tests.IrisTest): | ||
| def setUp(self): | ||
| lazy_data = as_lazy_data(ma.masked_equal([1, 2, 3, 4, 5], 3)) | ||
| self.lazy_cube = Cube(lazy_data) | ||
| self.lazy_cube.add_dim_coord(DimCoord([6, 7, 8, 9, 10], | ||
| long_name='foo'), | ||
| 0) | ||
| self.func = lambda x: x >= 3 | ||
|
|
||
| def test_ma(self): | ||
| cube = self.lazy_cube.collapsed("foo", COUNT, function=self.func) | ||
| self.assertTrue(cube.has_lazy_data()) | ||
| self.assertArrayEqual(cube.data, [2]) | ||
|
|
||
|
|
||
| class Test_aggregate_shape(tests.IrisTest): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,92 @@ | ||
| # (C) British Crown Copyright 2018, Met Office | ||
| # | ||
| # This file is part of Iris. | ||
| # | ||
| # Iris is free software: you can redistribute it and/or modify it under | ||
| # the terms of the GNU Lesser General Public License as published by the | ||
| # Free Software Foundation, either version 3 of the License, or | ||
| # (at your option) any later version. | ||
| # | ||
| # Iris is distributed in the hope that it will be useful, | ||
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| # GNU Lesser General Public License for more details. | ||
| # | ||
| # You should have received a copy of the GNU Lesser General Public License | ||
| # along with Iris. If not, see <http://www.gnu.org/licenses/>. | ||
| """Unit tests for the :data:`iris.analysis.MAX` aggregator.""" | ||
|
|
||
| from __future__ import (absolute_import, division, print_function) | ||
| from six.moves import (filter, input, map, range, zip) # noqa | ||
|
|
||
| # Import iris.tests first so that some things can be initialised before | ||
| # importing anything else. | ||
| import iris.tests as tests | ||
|
|
||
| import numpy as np | ||
| import numpy.ma as ma | ||
|
|
||
| from iris.analysis import MAX | ||
| from iris.cube import Cube | ||
| from iris.coords import DimCoord | ||
| from iris._lazy_data import as_lazy_data | ||
|
|
||
|
|
||
| class Test_basics(tests.IrisTest): | ||
| def setUp(self): | ||
| data = np.array([1, 2, 3, 4, 5]) | ||
| coord = DimCoord([6, 7, 8, 9, 10], long_name='foo') | ||
| self.cube = Cube(data) | ||
| self.cube.add_dim_coord(coord, 0) | ||
| self.lazy_cube = Cube(as_lazy_data(data)) | ||
| self.lazy_cube.add_dim_coord(coord, 0) | ||
|
|
||
| def test_name(self): | ||
| self.assertEqual(MAX.name(), 'maximum') | ||
|
|
||
| def test_collapse(self): | ||
| cube = self.cube.collapsed("foo", MAX) | ||
| self.assertArrayEqual(cube.data, [5]) | ||
|
|
||
| def test_lazy(self): | ||
| cube = self.lazy_cube.collapsed("foo", MAX) | ||
| self.assertTrue(cube.has_lazy_data()) | ||
|
|
||
| def test_lazy_collapse(self): | ||
| cube = self.lazy_cube.collapsed("foo", MAX) | ||
| self.assertArrayEqual(cube.data, [5]) | ||
|
|
||
|
|
||
| class Test_masked(tests.IrisTest): | ||
| def setUp(self): | ||
| self.cube = Cube(ma.masked_greater([1, 2, 3, 4, 5], 3)) | ||
| self.cube.add_dim_coord(DimCoord([6, 7, 8, 9, 10], long_name='foo'), 0) | ||
|
|
||
| def test_ma(self): | ||
| cube = self.cube.collapsed("foo", MAX) | ||
| self.assertArrayEqual(cube.data, [3]) | ||
|
|
||
|
|
||
| class Test_lazy_masked(tests.IrisTest): | ||
| def setUp(self): | ||
| masked_data = ma.masked_greater([1, 2, 3, 4, 5], 3) | ||
| self.cube = Cube(as_lazy_data(masked_data)) | ||
| self.cube.add_dim_coord(DimCoord([6, 7, 8, 9, 10], long_name='foo'), 0) | ||
|
|
||
| def test_lazy_ma(self): | ||
| cube = self.cube.collapsed("foo", MAX) | ||
| self.assertTrue(cube.has_lazy_data()) | ||
| self.assertArrayEqual(cube.data, [3]) | ||
|
|
||
|
|
||
| class Test_aggregate_shape(tests.IrisTest): | ||
| def test(self): | ||
| shape = () | ||
| kwargs = dict() | ||
| self.assertTupleEqual(MAX.aggregate_shape(**kwargs), shape) | ||
| kwargs = dict(wibble='wobble') | ||
| self.assertTupleEqual(MAX.aggregate_shape(**kwargs), shape) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| tests.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm +0 on this extra exception. Without it (assuming you do
func = kwargs.pop('function', None)we get a TypeError, which is good enough for me (I accept that KeyError give you even more exception fidelity, but my argument is that the fidelity isn't worth the extra lines).