Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions lib/iris/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1016,7 +1016,8 @@ def post_process(self, collapsed_cube, data_result, coords, **kwargs):
return result


def _percentile(data, axis, percent, **kwargs):
def _percentile(data, axis, percent, fast_percentile_method=False,
**kwargs):
"""
The percentile aggregator is an additive operation. This means that
it *may* introduce a new dimension to the data for the statistic being
Expand All @@ -1025,18 +1026,34 @@ def _percentile(data, axis, percent, **kwargs):
If a new additive dimension is formed, then it will always be the last
dimension of the resulting percentile data payload.

Kwargs:

* fast_percentile_method (boolean) :
When set to True, uses the numpy.percentiles method as a faster
alternative to the scipy.mstats.mquantiles method. Does not handle
masked arrays.

"""
# Ensure that the target axis is the last dimension.
data = np.rollaxis(data, axis, start=data.ndim)
quantiles = np.array(percent) / 100.
shape = data.shape[:-1]
# Flatten any leading dimensions.
if shape:
data = data.reshape([np.prod(shape), data.shape[-1]])
# Perform the percentile calculation.
result = scipy.stats.mstats.mquantiles(data, quantiles, axis=-1, **kwargs)
if fast_percentile_method:
msg = 'Cannot use fast np.percentile method with masked array.'
if ma.isMaskedArray(data):
raise TypeError(msg)
result = np.percentile(data, percent, axis=-1)
result = result.T
else:
quantiles = np.array(percent) / 100.
result = scipy.stats.mstats.mquantiles(data, quantiles, axis=-1,
**kwargs)
if not ma.isMaskedArray(data) and not ma.is_masked(result):
result = np.asarray(result)

# Ensure to unflatten any leading dimensions.
if shape:
if not isinstance(percent, collections.Iterable):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" ?>
<cubes xmlns="urn:x-iris:cubeml-0.2">
<cube core-dtype="float64" dtype="float64" long_name="thingness" units="1">
<coords>
<coord>
<dimCoord bounds="[[0, 11]]" id="b0d35dcf" long_name="foo" points="[5]" shape="(1,)" units="Unit('1')" value_type="int32"/>
</coord>
<coord>
<dimCoord id="3d9231d0" long_name="percentile_over_foo" points="[25]" shape="(1,)" units="Unit('1')" value_type="int64"/>
</coord>
</coords>
<cellMethods/>
<data dtype="float64" shape="()" state="loaded"/>
</cube>
</cubes>
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version="1.0" ?>
<cubes xmlns="urn:x-iris:cubeml-0.2">
<cube core-dtype="float64" dtype="float64" long_name="thingness" units="1">
<coords>
<coord datadims="[0]">
<dimCoord bounds="[[0, 5],
[5, 10],
[10, 15]]" id="434cbbd8" long_name="bar" points="[2.5, 7.5, 12.5]" shape="(3,)" units="Unit('1')" value_type="float64"/>
</coord>
<coord>
<dimCoord bounds="[[-15, 45]]" id="b0d35dcf" long_name="foo" points="[15.0]" shape="(1,)" units="Unit('1')" value_type="float64"/>
</coord>
<coord>
<dimCoord id="3d9231d0" long_name="percentile_over_foo" points="[25]" shape="(1,)" units="Unit('1')" value_type="int64"/>
</coord>
</coords>
<cellMethods/>
<data dtype="float64" shape="(3,)" state="loaded"/>
</cube>
</cubes>
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?xml version="1.0" ?>
<cubes xmlns="urn:x-iris:cubeml-0.2">
<cube core-dtype="float64" dtype="float64" long_name="thingness" units="1">
<coords>
<coord>
<dimCoord bounds="[[0, 15]]" id="434cbbd8" long_name="bar" points="[7.5]" shape="(1,)" units="Unit('1')" value_type="float64"/>
</coord>
<coord>
<dimCoord bounds="[[-15, 45]]" id="b0d35dcf" long_name="foo" points="[15.0]" shape="(1,)" units="Unit('1')" value_type="float64"/>
</coord>
<coord>
<dimCoord id="3a1fa26e" long_name="percentile_over_foo_bar" points="[25]" shape="(1,)" units="Unit('1')" value_type="int64"/>
</coord>
</coords>
<cellMethods/>
<data dtype="float64" shape="()" state="loaded"/>
</cube>
</cubes>
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?xml version="1.0" ?>
<cubes xmlns="urn:x-iris:cubeml-0.2">
<cube core-dtype="float64" dtype="float64" long_name="thingness" units="1">
<coords>
<coord datadims="[0]">
<dimCoord id="4a0cb9d8" points="[90, 0, -90]" shape="(3,)" standard_name="latitude" units="Unit('degrees')" value_type="int64"/>
</coord>
<coord datadims="[1]">
<dimCoord circular="True" id="62e940e0" points="[-180, -90, 0, 90]" shape="(4,)" standard_name="longitude" units="Unit('degrees')" value_type="int64"/>
</coord>
<coord>
<dimCoord id="cf515091" long_name="percentile_over_wibble" points="[75]" shape="(1,)" units="Unit('1')" value_type="int64"/>
</coord>
<coord>
<dimCoord bounds="[[10.0, 30.0]]" id="10b8e1fc" long_name="wibble" points="[20.0]" shape="(1,)" units="Unit('1')" value_type="float32"/>
</coord>
</coords>
<cellMethods/>
<data dtype="float64" shape="(3, 4)" state="loaded"/>
</cube>
</cubes>
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" ?>
<cubes xmlns="urn:x-iris:cubeml-0.2">
<cube core-dtype="float64" dtype="float64" long_name="thingness" units="1">
<coords>
<coord>
<dimCoord bounds="[[0, 11]]" id="b0d35dcf" long_name="foo" points="[5]" shape="(1,)" units="Unit('1')" value_type="int32"/>
</coord>
<coord>
<dimCoord id="3d9231d0" long_name="percentile_over_foo" points="[75]" shape="(1,)" units="Unit('1')" value_type="int64"/>
</coord>
</coords>
<cellMethods/>
<data dtype="float64" shape="()" state="loaded"/>
</cube>
</cubes>
193 changes: 124 additions & 69 deletions lib/iris/tests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,102 +351,157 @@ def test_multi_coord_mdtol(self):


class TestAggregators(tests.IrisTest):
def test_percentile_1d(self):

def _check_collapsed_percentile(self, cube, percents, collapse_coord,
expected_result, CML_filename=None,
**kwargs):
expected_result = np.array(expected_result, dtype=np.float32)
result = cube.collapsed(collapse_coord, iris.analysis.PERCENTILE,
percent=percents, **kwargs)
np.testing.assert_array_almost_equal(result.data, expected_result)
if CML_filename is not None:
self.assertCML(result, ('analysis', CML_filename), checksum=False)

def _check_percentile(self, data, axis, percents, expected_result,
**kwargs):
result = iris.analysis._percentile(data, axis, percents, **kwargs)
np.testing.assert_array_almost_equal(result, expected_result)

def test_percentile_1d_25_percent(self):
cube = tests.stock.simple_1d()
self._check_collapsed_percentile(
cube, 25, 'foo', 2.5, CML_filename='first_quartile_foo_1d.cml')

first_quartile = cube.collapsed('foo', iris.analysis.PERCENTILE,
percent=25)
np.testing.assert_array_almost_equal(first_quartile.data,
np.array([2.5], dtype=np.float32))
self.assertCML(first_quartile, ('analysis',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see you've dropped all the CML checks from these tests. Can you re-implement them please, as they're important for checking the resultant cubes as a whole entity are as expected following a collapse operation. You will need to add new CML results for the new tests you've added as well.

I wouldn't expect the CML to change for the existing tests (though you've renamed the tests). If the CML is changing after accounting for the renamed tests that may well be cause for concern.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've put these back in and added new results files for the fast_percentile_method tests.

'first_quartile_foo_1d.cml'),
checksum=False)
def test_percentile_1d_75_percent(self):
cube = tests.stock.simple_1d()
self._check_collapsed_percentile(
cube, 75, 'foo', 7.5, CML_filename='third_quartile_foo_1d.cml')

third_quartile = cube.collapsed('foo', iris.analysis.PERCENTILE,
percent=75)
np.testing.assert_array_almost_equal(third_quartile.data,
np.array([7.5],
dtype=np.float32))
self.assertCML(third_quartile,
('analysis', 'third_quartile_foo_1d.cml'),
checksum=False)
def test_fast_percentile_1d_25_percent(self):
cube = tests.stock.simple_1d()
self._check_collapsed_percentile(
cube, 25, 'foo', 2.5, fast_percentile_method=True,
CML_filename='first_quartile_foo_1d_fast_percentile.cml')

def test_fast_percentile_1d_75_percent(self):
cube = tests.stock.simple_1d()
self._check_collapsed_percentile(
cube, 75, 'foo', 7.5, fast_percentile_method=True,
CML_filename='third_quartile_foo_1d_fast_percentile.cml')

def test_percentile_2d(self):
def test_percentile_2d_single_coord(self):
cube = tests.stock.simple_2d()
self._check_collapsed_percentile(
cube, 25, 'foo', [0.75, 4.75, 8.75],
CML_filename='first_quartile_foo_2d.cml')

first_quartile = cube.collapsed('foo', iris.analysis.PERCENTILE,
percent=25)
np.testing.assert_array_almost_equal(first_quartile.data,
np.array([0.75, 4.75, 8.75],
dtype=np.float32))
self.assertCML(first_quartile, ('analysis',
'first_quartile_foo_2d.cml'),
checksum=False)
def test_percentile_2d_two_coords(self):
cube = tests.stock.simple_2d()
self._check_collapsed_percentile(
cube, 25, ['foo', 'bar'], [2.75],
CML_filename='first_quartile_foo_bar_2d.cml')

first_quartile = cube.collapsed(('foo', 'bar'),
iris.analysis.PERCENTILE, percent=25)
np.testing.assert_array_almost_equal(first_quartile.data,
np.array([2.75],
dtype=np.float32))
self.assertCML(first_quartile, ('analysis',
'first_quartile_foo_bar_2d.cml'),
checksum=False)
def test_fast_percentile_2d_single_coord(self):
cube = tests.stock.simple_2d()
self._check_collapsed_percentile(
cube, 25, 'foo', [0.75, 4.75, 8.75], fast_percentile_method=True,
CML_filename='first_quartile_foo_2d_fast_percentile.cml')

def test_fast_percentile_2d_two_coords(self):
cube = tests.stock.simple_2d()
self._check_collapsed_percentile(
cube, 25, ['foo', 'bar'], [2.75], fast_percentile_method=True,
CML_filename='first_quartile_foo_bar_2d_fast_percentile.cml')

def test_percentile_3d(self):
array_3d = np.arange(24, dtype=np.int32).reshape((2, 3, 4))
expected_result = np.array([[6., 7., 8., 9.],
[10., 11., 12., 13.],
[14., 15., 16., 17.]],
dtype=np.float32)
self._check_percentile(array_3d, 0, 50, expected_result)

last_quartile = iris.analysis._percentile(array_3d, 0, 50)
np.testing.assert_array_almost_equal(last_quartile,
np.array([[6., 7., 8., 9.],
[10., 11., 12., 13.],
[14., 15., 16., 17.]],
dtype=np.float32))
def test_fast_percentile_3d(self):
array_3d = np.arange(24, dtype=np.int32).reshape((2, 3, 4))
expected_result = np.array([[6., 7., 8., 9.],
[10., 11., 12., 13.],
[14., 15., 16., 17.]],
dtype=np.float32)
self._check_percentile(array_3d, 0, 50, expected_result,
fast_percentile_method=True)

def test_percentile_3d_axis_one(self):
array_3d = np.arange(24, dtype=np.int32).reshape((2, 3, 4))
expected_result = np.array([[4., 5., 6., 7.],
[16., 17., 18., 19.]],
dtype=np.float32)

last_quartile = iris.analysis._percentile(array_3d, 1, 50)
np.testing.assert_array_almost_equal(last_quartile,
np.array([[4., 5., 6., 7.],
[16., 17., 18., 19.]],
dtype=np.float32))
self._check_percentile(array_3d, 1, 50, expected_result)

def test_fast_percentile_3d_axis_one(self):
array_3d = np.arange(24, dtype=np.int32).reshape((2, 3, 4))
expected_result = np.array([[4., 5., 6., 7.],
[16., 17., 18., 19.]],
dtype=np.float32)

self._check_percentile(array_3d, 1, 50, expected_result,
fast_percentile_method=True)

def test_percentile_3d_axis_two(self):
array_3d = np.arange(24, dtype=np.int32).reshape((2, 3, 4))
expected_result = np.array([[1.5, 5.5, 9.5],
[13.5, 17.5, 21.5]],
dtype=np.float32)

last_quartile = iris.analysis._percentile(array_3d, 2, 50)
np.testing.assert_array_almost_equal(last_quartile,
np.array([[1.5, 5.5, 9.5],
[13.5, 17.5, 21.5]],
dtype=np.float32))
self._check_percentile(array_3d, 2, 50, expected_result)

def test_fast_percentile_3d_axis_two(self):
array_3d = np.arange(24, dtype=np.int32).reshape((2, 3, 4))
expected_result = np.array([[1.5, 5.5, 9.5],
[13.5, 17.5, 21.5]],
dtype=np.float32)

self._check_percentile(array_3d, 2, 50, expected_result,
fast_percentile_method=True)

def test_percentile_3d_masked(self):
cube = tests.stock.simple_3d_mask()
expected_result = [[12., 13., 14., 15.],
[16., 17., 18., 19.],
[20., 18., 19., 20.]]

last_quartile = cube.collapsed('wibble',
iris.analysis.PERCENTILE, percent=75)
np.testing.assert_array_almost_equal(last_quartile.data,
np.array([[12., 13., 14., 15.],
[16., 17., 18., 19.],
[20., 18., 19., 20.]],
dtype=np.float32))
self.assertCML(last_quartile, ('analysis',
'last_quartile_foo_3d_masked.cml'),
checksum=False)
self._check_collapsed_percentile(
cube, 75, 'wibble', expected_result,
CML_filename='last_quartile_foo_3d_masked.cml')

def test_fast_percentile_3d_masked(self):
cube = tests.stock.simple_3d_mask()
msg = 'Cannot use fast np.percentile method with masked array.'

with self.assertRaisesRegexp(TypeError, msg):
cube.collapsed('wibble',
iris.analysis.PERCENTILE, percent=75,
fast_percentile_method=True)

def test_percentile_3d_notmasked(self):
cube = tests.stock.simple_3d()
expected_result = [[9., 10., 11., 12.],
[13., 14., 15., 16.],
[17., 18., 19., 20.]]

last_quartile = cube.collapsed('wibble',
iris.analysis.PERCENTILE, percent=75)
np.testing.assert_array_almost_equal(last_quartile.data,
np.array([[9., 10., 11., 12.],
[13., 14., 15., 16.],
[17., 18., 19., 20.]],
dtype=np.float32))
self.assertCML(last_quartile, ('analysis',
'last_quartile_foo_3d_notmasked.cml'),
checksum=False)
self._check_collapsed_percentile(
cube, 75, 'wibble', expected_result,
CML_filename='last_quartile_foo_3d_notmasked.cml')

def test_fast_percentile_3d_notmasked(self):
cube = tests.stock.simple_3d()
expected_result = [[9., 10., 11., 12.],
[13., 14., 15., 16.],
[17., 18., 19., 20.]]

self._check_collapsed_percentile(
cube, 75, 'wibble', expected_result, fast_percentile_method=True,
CML_filename='last_quartile_foo_3d_notmasked_fast_percentile.cml')

def test_proportion(self):
cube = tests.stock.simple_1d()
Expand Down