Skip to content
78 changes: 56 additions & 22 deletions lib/iris/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1193,11 +1193,22 @@ def _weighted_percentile(data, axis, weights, percent, returned=False,

def _count(array, function, axis, **kwargs):
if not callable(function):
raise ValueError('function must be a callable. Got %s.'
% type(function))
emsg = 'function must be a callable. Got {}.'
raise TypeError(emsg.format(type(function)))
return ma.sum(function(array), axis=axis, **kwargs)


def _lazy_count(array, axis, **kwargs):
try:
func = kwargs.pop('function')
except KeyError:
raise KeyError('no selection function supplied.')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm +0 on this extra exception. Without it (assuming you do func = kwargs.pop('function', None) we get a TypeError, which is good enough for me (I accept that KeyError give you even more exception fidelity, but my argument is that the fidelity isn't worth the extra lines).

if not callable(func):
emsg = 'function must be a callable. Got {}.'
raise TypeError(emsg.format(type(func)))
return da.sum(func(array), axis=axis, **kwargs)


def _proportion(array, function, axis, **kwargs):
# if the incoming array is masked use that to count the total number of
# values
Expand Down Expand Up @@ -1248,6 +1259,25 @@ def _sum(array, **kwargs):
return rvalue


def _lazy_sum(array, axis, **kwargs):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious: why are we duplicating all of this logic, and not just implementing it once lazily? If we want a non-lazy form, we can just compute the result (as in, lambda _sum: _lazy_sum(*args, **kwargs).compute()), right?

# weighted or scaled sum
weights_in = kwargs.pop('weights', None)
returned_in = kwargs.pop('returned', False)
if weights_in is not None:
wsum = da.sum(weights_in * array, axis=axis, **kwargs)
else:
wsum = da.sum(array, axis=axis, **kwargs)
if returned_in:
if weights_in is None:
weights = iris_lazy_data.as_lazy_data(np.ones_like(array))
else:
weights = weights_in
rvalue = (wsum, da.sum(weights, axis=axis))
else:
rvalue = wsum
return rvalue


def _peak(array, **kwargs):
def column_segments(column):
nan_indices = np.where(np.isnan(column))[0]
Expand Down Expand Up @@ -1353,7 +1383,8 @@ def interp_order(length):
# Common partial Aggregation class constructors.
#
COUNT = Aggregator('count', _count,
units_func=lambda units: 1)
units_func=lambda units: 1,
lazy_func=_lazy_count)
"""
An :class:`~iris.analysis.Aggregator` instance that counts the number
of :class:`~iris.cube.Cube` data occurrences that satisfy a particular
Expand Down Expand Up @@ -1419,23 +1450,6 @@ def interp_order(length):
"""


MAX = Aggregator('maximum', ma.max)
"""
An :class:`~iris.analysis.Aggregator` instance that calculates
the maximum over a :class:`~iris.cube.Cube`, as computed by
:func:`numpy.ma.max`.

**For example**:

To compute zonal maximums over the *longitude* axis of a cube::

result = cube.collapsed('longitude', iris.analysis.MAX)

This aggregator handles masked data.

"""


def _build_dask_mdtol_function(dask_stats_function):
"""
Make a wrapped dask statistic function that supports the 'mdtol' keyword.
Expand Down Expand Up @@ -1469,6 +1483,7 @@ def inner_stat(array, axis=-1, mdtol=None, **kwargs):
return result
return inner_stat


MEAN = WeightedAggregator('mean', ma.average,
lazy_func=_build_dask_mdtol_function(da.mean))
"""
Expand Down Expand Up @@ -1534,7 +1549,8 @@ def inner_stat(array, axis=-1, mdtol=None, **kwargs):
"""


MIN = Aggregator('minimum', ma.min)
MIN = Aggregator('minimum', ma.min,
lazy_func=_build_dask_mdtol_function(da.min))
"""
An :class:`~iris.analysis.Aggregator` instance that calculates
the minimum over a :class:`~iris.cube.Cube`, as computed by
Expand All @@ -1551,6 +1567,24 @@ def inner_stat(array, axis=-1, mdtol=None, **kwargs):
"""


MAX = Aggregator('maximum', ma.max,
lazy_func=_build_dask_mdtol_function(da.max))
"""
An :class:`~iris.analysis.Aggregator` instance that calculates
the maximum over a :class:`~iris.cube.Cube`, as computed by
:func:`numpy.ma.max`.

**For example**:

To compute zonal maximums over the *longitude* axis of a cube::

result = cube.collapsed('longitude', iris.analysis.MAX)

This aggregator handles masked data.

"""


PEAK = Aggregator('peak', _peak)
"""
An :class:`~iris.analysis.Aggregator` instance that calculates
Expand Down Expand Up @@ -1700,7 +1734,7 @@ def inner_stat(array, axis=-1, mdtol=None, **kwargs):
"""


SUM = WeightedAggregator('sum', _sum)
SUM = WeightedAggregator('sum', _sum, lazy_func=_lazy_sum)
"""
An :class:`~iris.analysis.Aggregator` instance that calculates
the sum over a :class:`~iris.cube.Cube`, as computed by :func:`numpy.ma.sum`.
Expand Down
80 changes: 74 additions & 6 deletions lib/iris/tests/unit/analysis/test_COUNT.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) British Crown Copyright 2013 - 2017, Met Office
# (C) British Crown Copyright 2013 - 2018, Met Office
#
# This file is part of Iris.
#
Expand All @@ -23,11 +23,51 @@
# importing anything else.
import iris.tests as tests

import numpy as np
import numpy.ma as ma

from iris.analysis import COUNT
import iris.cube
from iris.cube import Cube
from iris.coords import DimCoord
from iris._lazy_data import as_lazy_data


class Test_basics(tests.IrisTest):
def setUp(self):
data = np.array([1, 2, 3, 4, 5])
coord = DimCoord([6, 7, 8, 9, 10], long_name='foo')
self.cube = Cube(data)
self.cube.add_dim_coord(coord, 0)
self.lazy_cube = Cube(as_lazy_data(data))
self.lazy_cube.add_dim_coord(coord, 0)
self.func = lambda x: x >= 3

def test_name(self):
self.assertEqual(COUNT.name(), 'count')

def test_no_function(self):
with self.assertRaisesRegexp(KeyError, 'no selection function'):
self.lazy_cube.collapsed("foo", COUNT)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not that happy that the unit test is calling cube.collapsed. Could you just test that the aggregator produces the right numbers when you pass a numpy array through, and thus rely on the testing of the Aggregator class to ensure that the integration is good?


def test_not_callable(self):
with self.assertRaisesRegexp(TypeError, 'function must be a callable'):
self.cube.collapsed("foo", COUNT, function='wibble')

def test_lazy_not_callable(self):
with self.assertRaisesRegexp(TypeError, 'function must be a callable'):
self.lazy_cube.collapsed("foo", COUNT, function='wibble')

def test_collapse(self):
cube = self.cube.collapsed("foo", COUNT, function=self.func)
self.assertArrayEqual(cube.data, [3])

def test_lazy(self):
cube = self.lazy_cube.collapsed("foo", COUNT, function=self.func)
self.assertTrue(cube.has_lazy_data())

def test_lazy_collapse(self):
cube = self.lazy_cube.collapsed("foo", COUNT, function=self.func)
self.assertArrayEqual(cube.data, [3])


class Test_units_func(tests.IrisTest):
Expand All @@ -39,7 +79,7 @@ def test(self):

class Test_masked(tests.IrisTest):
def setUp(self):
self.cube = iris.cube.Cube(ma.masked_equal([1, 2, 3, 4, 5], 3))
self.cube = Cube(ma.masked_equal([1, 2, 3, 4, 5], 3))
self.cube.add_dim_coord(DimCoord([6, 7, 8, 9, 10], long_name='foo'), 0)
self.func = lambda x: x >= 3

Expand All @@ -48,9 +88,37 @@ def test_ma(self):
self.assertArrayEqual(cube.data, [2])


class Test_name(tests.IrisTest):
def test(self):
self.assertEqual(COUNT.name(), 'count')
class Test_lazy(tests.IrisTest):
def setUp(self):
data = np.array([1, 2, 3, 4, 5])
self.cube = Cube(as_lazy_data(data))
self.cube.add_dim_coord(DimCoord([6, 7, 8, 9, 10], long_name='foo'), 0)
self.func = lambda x: x >= 3

def test_lazy_oper(self):
cube = self.cube.collapsed("foo", COUNT, function=self.func)
self.assertTrue(cube.has_lazy_data())

def test_collapse(self):
result = self.cube.collapsed("foo", COUNT, function=self.func)
self.cube.data
expected = self.cube.collapsed("foo", COUNT, function=self.func)
self.assertArrayEqual(result.data, expected.data)


class Test_lazy_masked(tests.IrisTest):
def setUp(self):
lazy_data = as_lazy_data(ma.masked_equal([1, 2, 3, 4, 5], 3))
self.lazy_cube = Cube(lazy_data)
self.lazy_cube.add_dim_coord(DimCoord([6, 7, 8, 9, 10],
long_name='foo'),
0)
self.func = lambda x: x >= 3

def test_ma(self):
cube = self.lazy_cube.collapsed("foo", COUNT, function=self.func)
self.assertTrue(cube.has_lazy_data())
self.assertArrayEqual(cube.data, [2])


class Test_aggregate_shape(tests.IrisTest):
Expand Down
92 changes: 92 additions & 0 deletions lib/iris/tests/unit/analysis/test_MAX.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# (C) British Crown Copyright 2018, Met Office
#
# This file is part of Iris.
#
# Iris is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Iris is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Iris. If not, see <http://www.gnu.org/licenses/>.
"""Unit tests for the :data:`iris.analysis.MAX` aggregator."""

from __future__ import (absolute_import, division, print_function)
from six.moves import (filter, input, map, range, zip) # noqa

# Import iris.tests first so that some things can be initialised before
# importing anything else.
import iris.tests as tests

import numpy as np
import numpy.ma as ma

from iris.analysis import MAX
from iris.cube import Cube
from iris.coords import DimCoord
from iris._lazy_data import as_lazy_data


class Test_basics(tests.IrisTest):
def setUp(self):
data = np.array([1, 2, 3, 4, 5])
coord = DimCoord([6, 7, 8, 9, 10], long_name='foo')
self.cube = Cube(data)
self.cube.add_dim_coord(coord, 0)
self.lazy_cube = Cube(as_lazy_data(data))
self.lazy_cube.add_dim_coord(coord, 0)

def test_name(self):
self.assertEqual(MAX.name(), 'maximum')

def test_collapse(self):
cube = self.cube.collapsed("foo", MAX)
self.assertArrayEqual(cube.data, [5])

def test_lazy(self):
cube = self.lazy_cube.collapsed("foo", MAX)
self.assertTrue(cube.has_lazy_data())

def test_lazy_collapse(self):
cube = self.lazy_cube.collapsed("foo", MAX)
self.assertArrayEqual(cube.data, [5])


class Test_masked(tests.IrisTest):
def setUp(self):
self.cube = Cube(ma.masked_greater([1, 2, 3, 4, 5], 3))
self.cube.add_dim_coord(DimCoord([6, 7, 8, 9, 10], long_name='foo'), 0)

def test_ma(self):
cube = self.cube.collapsed("foo", MAX)
self.assertArrayEqual(cube.data, [3])


class Test_lazy_masked(tests.IrisTest):
def setUp(self):
masked_data = ma.masked_greater([1, 2, 3, 4, 5], 3)
self.cube = Cube(as_lazy_data(masked_data))
self.cube.add_dim_coord(DimCoord([6, 7, 8, 9, 10], long_name='foo'), 0)

def test_lazy_ma(self):
cube = self.cube.collapsed("foo", MAX)
self.assertTrue(cube.has_lazy_data())
self.assertArrayEqual(cube.data, [3])


class Test_aggregate_shape(tests.IrisTest):
def test(self):
shape = ()
kwargs = dict()
self.assertTupleEqual(MAX.aggregate_shape(**kwargs), shape)
kwargs = dict(wibble='wobble')
self.assertTupleEqual(MAX.aggregate_shape(**kwargs), shape)


if __name__ == "__main__":
tests.main()
Loading