diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py index 818bf2ac91..b54cabc3ef 100644 --- a/lib/iris/analysis/__init__.py +++ b/lib/iris/analysis/__init__.py @@ -1016,7 +1016,8 @@ def post_process(self, collapsed_cube, data_result, coords, **kwargs): return result -def _percentile(data, axis, percent, **kwargs): +def _percentile(data, axis, percent, fast_percentile_method=False, + **kwargs): """ The percentile aggregator is an additive operation. This means that it *may* introduce a new dimension to the data for the statistic being @@ -1025,18 +1026,34 @@ def _percentile(data, axis, percent, **kwargs): If a new additive dimension is formed, then it will always be the last dimension of the resulting percentile data payload. + Kwargs: + + * fast_percentile_method (boolean) : + When set to True, uses the numpy.percentiles method as a faster + alternative to the scipy.mstats.mquantiles method. Does not handle + masked arrays. + """ # Ensure that the target axis is the last dimension. data = np.rollaxis(data, axis, start=data.ndim) - quantiles = np.array(percent) / 100. shape = data.shape[:-1] # Flatten any leading dimensions. if shape: data = data.reshape([np.prod(shape), data.shape[-1]]) # Perform the percentile calculation. - result = scipy.stats.mstats.mquantiles(data, quantiles, axis=-1, **kwargs) + if fast_percentile_method: + msg = 'Cannot use fast np.percentile method with masked array.' + if ma.isMaskedArray(data): + raise TypeError(msg) + result = np.percentile(data, percent, axis=-1) + result = result.T + else: + quantiles = np.array(percent) / 100. + result = scipy.stats.mstats.mquantiles(data, quantiles, axis=-1, + **kwargs) if not ma.isMaskedArray(data) and not ma.is_masked(result): result = np.asarray(result) + # Ensure to unflatten any leading dimensions. if shape: if not isinstance(percent, collections.Iterable): diff --git a/lib/iris/tests/results/analysis/first_quartile_foo_1d_fast_percentile.cml b/lib/iris/tests/results/analysis/first_quartile_foo_1d_fast_percentile.cml new file mode 100644 index 0000000000..13dff8ffe7 --- /dev/null +++ b/lib/iris/tests/results/analysis/first_quartile_foo_1d_fast_percentile.cml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/lib/iris/tests/results/analysis/first_quartile_foo_2d_fast_percentile.cml b/lib/iris/tests/results/analysis/first_quartile_foo_2d_fast_percentile.cml new file mode 100644 index 0000000000..8d04577937 --- /dev/null +++ b/lib/iris/tests/results/analysis/first_quartile_foo_2d_fast_percentile.cml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + diff --git a/lib/iris/tests/results/analysis/first_quartile_foo_bar_2d_fast_percentile.cml b/lib/iris/tests/results/analysis/first_quartile_foo_bar_2d_fast_percentile.cml new file mode 100644 index 0000000000..c9e86317bd --- /dev/null +++ b/lib/iris/tests/results/analysis/first_quartile_foo_bar_2d_fast_percentile.cml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/lib/iris/tests/results/analysis/last_quartile_foo_3d_notmasked_fast_percentile.cml b/lib/iris/tests/results/analysis/last_quartile_foo_3d_notmasked_fast_percentile.cml new file mode 100644 index 0000000000..9eaf7d59b7 --- /dev/null +++ b/lib/iris/tests/results/analysis/last_quartile_foo_3d_notmasked_fast_percentile.cml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/lib/iris/tests/results/analysis/third_quartile_foo_1d_fast_percentile.cml b/lib/iris/tests/results/analysis/third_quartile_foo_1d_fast_percentile.cml new file mode 100644 index 0000000000..8a0f1e479d --- /dev/null +++ b/lib/iris/tests/results/analysis/third_quartile_foo_1d_fast_percentile.cml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/lib/iris/tests/test_analysis.py b/lib/iris/tests/test_analysis.py index 0ffd7e18ab..24ba178511 100644 --- a/lib/iris/tests/test_analysis.py +++ b/lib/iris/tests/test_analysis.py @@ -351,102 +351,157 @@ def test_multi_coord_mdtol(self): class TestAggregators(tests.IrisTest): - def test_percentile_1d(self): + + def _check_collapsed_percentile(self, cube, percents, collapse_coord, + expected_result, CML_filename=None, + **kwargs): + expected_result = np.array(expected_result, dtype=np.float32) + result = cube.collapsed(collapse_coord, iris.analysis.PERCENTILE, + percent=percents, **kwargs) + np.testing.assert_array_almost_equal(result.data, expected_result) + if CML_filename is not None: + self.assertCML(result, ('analysis', CML_filename), checksum=False) + + def _check_percentile(self, data, axis, percents, expected_result, + **kwargs): + result = iris.analysis._percentile(data, axis, percents, **kwargs) + np.testing.assert_array_almost_equal(result, expected_result) + + def test_percentile_1d_25_percent(self): cube = tests.stock.simple_1d() + self._check_collapsed_percentile( + cube, 25, 'foo', 2.5, CML_filename='first_quartile_foo_1d.cml') - first_quartile = cube.collapsed('foo', iris.analysis.PERCENTILE, - percent=25) - np.testing.assert_array_almost_equal(first_quartile.data, - np.array([2.5], dtype=np.float32)) - self.assertCML(first_quartile, ('analysis', - 'first_quartile_foo_1d.cml'), - checksum=False) + def test_percentile_1d_75_percent(self): + cube = tests.stock.simple_1d() + self._check_collapsed_percentile( + cube, 75, 'foo', 7.5, CML_filename='third_quartile_foo_1d.cml') - third_quartile = cube.collapsed('foo', iris.analysis.PERCENTILE, - percent=75) - np.testing.assert_array_almost_equal(third_quartile.data, - np.array([7.5], - dtype=np.float32)) - self.assertCML(third_quartile, - ('analysis', 'third_quartile_foo_1d.cml'), - checksum=False) + def test_fast_percentile_1d_25_percent(self): + cube = tests.stock.simple_1d() + self._check_collapsed_percentile( + cube, 25, 'foo', 2.5, fast_percentile_method=True, + CML_filename='first_quartile_foo_1d_fast_percentile.cml') + + def test_fast_percentile_1d_75_percent(self): + cube = tests.stock.simple_1d() + self._check_collapsed_percentile( + cube, 75, 'foo', 7.5, fast_percentile_method=True, + CML_filename='third_quartile_foo_1d_fast_percentile.cml') - def test_percentile_2d(self): + def test_percentile_2d_single_coord(self): cube = tests.stock.simple_2d() + self._check_collapsed_percentile( + cube, 25, 'foo', [0.75, 4.75, 8.75], + CML_filename='first_quartile_foo_2d.cml') - first_quartile = cube.collapsed('foo', iris.analysis.PERCENTILE, - percent=25) - np.testing.assert_array_almost_equal(first_quartile.data, - np.array([0.75, 4.75, 8.75], - dtype=np.float32)) - self.assertCML(first_quartile, ('analysis', - 'first_quartile_foo_2d.cml'), - checksum=False) + def test_percentile_2d_two_coords(self): + cube = tests.stock.simple_2d() + self._check_collapsed_percentile( + cube, 25, ['foo', 'bar'], [2.75], + CML_filename='first_quartile_foo_bar_2d.cml') - first_quartile = cube.collapsed(('foo', 'bar'), - iris.analysis.PERCENTILE, percent=25) - np.testing.assert_array_almost_equal(first_quartile.data, - np.array([2.75], - dtype=np.float32)) - self.assertCML(first_quartile, ('analysis', - 'first_quartile_foo_bar_2d.cml'), - checksum=False) + def test_fast_percentile_2d_single_coord(self): + cube = tests.stock.simple_2d() + self._check_collapsed_percentile( + cube, 25, 'foo', [0.75, 4.75, 8.75], fast_percentile_method=True, + CML_filename='first_quartile_foo_2d_fast_percentile.cml') + + def test_fast_percentile_2d_two_coords(self): + cube = tests.stock.simple_2d() + self._check_collapsed_percentile( + cube, 25, ['foo', 'bar'], [2.75], fast_percentile_method=True, + CML_filename='first_quartile_foo_bar_2d_fast_percentile.cml') def test_percentile_3d(self): array_3d = np.arange(24, dtype=np.int32).reshape((2, 3, 4)) + expected_result = np.array([[6., 7., 8., 9.], + [10., 11., 12., 13.], + [14., 15., 16., 17.]], + dtype=np.float32) + self._check_percentile(array_3d, 0, 50, expected_result) - last_quartile = iris.analysis._percentile(array_3d, 0, 50) - np.testing.assert_array_almost_equal(last_quartile, - np.array([[6., 7., 8., 9.], - [10., 11., 12., 13.], - [14., 15., 16., 17.]], - dtype=np.float32)) + def test_fast_percentile_3d(self): + array_3d = np.arange(24, dtype=np.int32).reshape((2, 3, 4)) + expected_result = np.array([[6., 7., 8., 9.], + [10., 11., 12., 13.], + [14., 15., 16., 17.]], + dtype=np.float32) + self._check_percentile(array_3d, 0, 50, expected_result, + fast_percentile_method=True) def test_percentile_3d_axis_one(self): array_3d = np.arange(24, dtype=np.int32).reshape((2, 3, 4)) + expected_result = np.array([[4., 5., 6., 7.], + [16., 17., 18., 19.]], + dtype=np.float32) - last_quartile = iris.analysis._percentile(array_3d, 1, 50) - np.testing.assert_array_almost_equal(last_quartile, - np.array([[4., 5., 6., 7.], - [16., 17., 18., 19.]], - dtype=np.float32)) + self._check_percentile(array_3d, 1, 50, expected_result) + + def test_fast_percentile_3d_axis_one(self): + array_3d = np.arange(24, dtype=np.int32).reshape((2, 3, 4)) + expected_result = np.array([[4., 5., 6., 7.], + [16., 17., 18., 19.]], + dtype=np.float32) + + self._check_percentile(array_3d, 1, 50, expected_result, + fast_percentile_method=True) def test_percentile_3d_axis_two(self): array_3d = np.arange(24, dtype=np.int32).reshape((2, 3, 4)) + expected_result = np.array([[1.5, 5.5, 9.5], + [13.5, 17.5, 21.5]], + dtype=np.float32) - last_quartile = iris.analysis._percentile(array_3d, 2, 50) - np.testing.assert_array_almost_equal(last_quartile, - np.array([[1.5, 5.5, 9.5], - [13.5, 17.5, 21.5]], - dtype=np.float32)) + self._check_percentile(array_3d, 2, 50, expected_result) + + def test_fast_percentile_3d_axis_two(self): + array_3d = np.arange(24, dtype=np.int32).reshape((2, 3, 4)) + expected_result = np.array([[1.5, 5.5, 9.5], + [13.5, 17.5, 21.5]], + dtype=np.float32) + + self._check_percentile(array_3d, 2, 50, expected_result, + fast_percentile_method=True) def test_percentile_3d_masked(self): cube = tests.stock.simple_3d_mask() + expected_result = [[12., 13., 14., 15.], + [16., 17., 18., 19.], + [20., 18., 19., 20.]] - last_quartile = cube.collapsed('wibble', - iris.analysis.PERCENTILE, percent=75) - np.testing.assert_array_almost_equal(last_quartile.data, - np.array([[12., 13., 14., 15.], - [16., 17., 18., 19.], - [20., 18., 19., 20.]], - dtype=np.float32)) - self.assertCML(last_quartile, ('analysis', - 'last_quartile_foo_3d_masked.cml'), - checksum=False) + self._check_collapsed_percentile( + cube, 75, 'wibble', expected_result, + CML_filename='last_quartile_foo_3d_masked.cml') + + def test_fast_percentile_3d_masked(self): + cube = tests.stock.simple_3d_mask() + msg = 'Cannot use fast np.percentile method with masked array.' + + with self.assertRaisesRegexp(TypeError, msg): + cube.collapsed('wibble', + iris.analysis.PERCENTILE, percent=75, + fast_percentile_method=True) def test_percentile_3d_notmasked(self): cube = tests.stock.simple_3d() + expected_result = [[9., 10., 11., 12.], + [13., 14., 15., 16.], + [17., 18., 19., 20.]] - last_quartile = cube.collapsed('wibble', - iris.analysis.PERCENTILE, percent=75) - np.testing.assert_array_almost_equal(last_quartile.data, - np.array([[9., 10., 11., 12.], - [13., 14., 15., 16.], - [17., 18., 19., 20.]], - dtype=np.float32)) - self.assertCML(last_quartile, ('analysis', - 'last_quartile_foo_3d_notmasked.cml'), - checksum=False) + self._check_collapsed_percentile( + cube, 75, 'wibble', expected_result, + CML_filename='last_quartile_foo_3d_notmasked.cml') + + def test_fast_percentile_3d_notmasked(self): + cube = tests.stock.simple_3d() + expected_result = [[9., 10., 11., 12.], + [13., 14., 15., 16.], + [17., 18., 19., 20.]] + + self._check_collapsed_percentile( + cube, 75, 'wibble', expected_result, fast_percentile_method=True, + CML_filename='last_quartile_foo_3d_notmasked_fast_percentile.cml') def test_proportion(self): cube = tests.stock.simple_1d()