-
Notifications
You must be signed in to change notification settings - Fork 300
Rework lazy_masked_fill_value. #2729
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -131,14 +131,14 @@ def as_concrete_data(data): | |
| return data | ||
|
|
||
|
|
||
| def lazy_masked_fill_value(data): | ||
| def get_fill_value(data): | ||
| """ | ||
| Return the fill value of a lazy masked array. | ||
| Return the fill value of a concrete or lazy masked array. | ||
|
|
||
| Args: | ||
|
|
||
| * data: | ||
| A dask array, NumPy `ndarray` or masked array | ||
| A dask array, NumPy `ndarray` or masked array. | ||
|
|
||
| Returns: | ||
| The fill value of `data` if `data` represents a masked array, or None. | ||
|
|
@@ -147,9 +147,14 @@ def lazy_masked_fill_value(data): | |
| # If lazy, get the smallest slice of the data from which we can retrieve | ||
| # the fill_value. | ||
| if is_lazy_data(data): | ||
| inds = tuple([0] * (data.ndim-1)) | ||
| smallest_slice = data[inds][:0] | ||
| data = as_concrete_data(smallest_slice) | ||
| inds = [0] * data.ndim | ||
| if inds: | ||
| inds[-1] = slice(0, 1) | ||
| data = data[tuple(inds)] | ||
| data = as_concrete_data(data) | ||
| else: | ||
| if isinstance(data, ma.core.MaskedConstant): | ||
| data = ma.array(data.data, mask=data.mask) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth a comment why you need to do this ? |
||
|
|
||
| # Now get the fill value. | ||
| fill_value = None | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -56,7 +56,7 @@ | |
| import iris.fileformats._pyke_rules | ||
| import iris.io | ||
| import iris.util | ||
| from iris._lazy_data import as_lazy_data, lazy_masked_fill_value | ||
| from iris._lazy_data import as_lazy_data, get_fill_value | ||
|
|
||
| # Show Pyke inference engine statistics. | ||
| DEBUG = False | ||
|
|
@@ -1926,10 +1926,7 @@ def set_packing_ncattrs(cfvar): | |
|
|
||
| if packing is None: | ||
| # Determine whether there is a cube MDI value. | ||
| if ma.isMaskedArray(cube.data): | ||
| fill_value = cube.data.fill_value | ||
| else: | ||
| fill_value = None | ||
| fill_value = get_fill_value(cube.core_data()) | ||
|
|
||
| # Get the values in a form which is valid for the file format. | ||
| data = self._ensure_valid_dtype(cube.data, 'cube', cube) | ||
|
|
@@ -1946,16 +1943,8 @@ def set_packing_ncattrs(cfvar): | |
|
|
||
| else: | ||
| # Create the cube CF-netCDF data variable. | ||
| # Set `fill_value` if the data array is masked. If the data array | ||
| # is lazy masked, we realise the smallest possible slice of the | ||
| # array and retrieve the fill value from that. | ||
| if packing is None: | ||
| if not cube.has_lazy_data() and ma.isMaskedArray(cube.data): | ||
| fill_value = cube.data.fill_value | ||
| elif cube.has_lazy_data(): | ||
| fill_value = lazy_masked_fill_value(cube.lazy_data()) | ||
| else: | ||
| fill_value = None | ||
| fill_value = get_fill_value(cube.core_data()) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 for simplification here ! |
||
| dtype = cube.dtype.newbyteorder('=') | ||
|
|
||
| cf_var = self._dataset.createVariable( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,75 @@ | ||
| # (C) British Crown Copyright 2017, Met Office | ||
| # | ||
| # This file is part of Iris. | ||
| # | ||
| # Iris is free software: you can redistribute it and/or modify it under | ||
| # the terms of the GNU Lesser General Public License as published by the | ||
| # Free Software Foundation, either version 3 of the License, or | ||
| # (at your option) any later version. | ||
| # | ||
| # Iris is distributed in the hope that it will be useful, | ||
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| # GNU Lesser General Public License for more details. | ||
| # | ||
| # You should have received a copy of the GNU Lesser General Public License | ||
| # along with Iris. If not, see <http://www.gnu.org/licenses/>. | ||
| """Test the function :func:`iris._lazy data.get_fill_value`.""" | ||
|
|
||
| from __future__ import (absolute_import, division, print_function) | ||
| from six.moves import (filter, input, map, range, zip) # noqa | ||
|
|
||
| # Import iris.tests first so that some things can be initialised before | ||
| # importing anything else. | ||
| import iris.tests as tests | ||
|
|
||
| import dask.array as da | ||
| import numpy as np | ||
| import numpy.ma as ma | ||
|
|
||
| from iris._lazy_data import as_lazy_data, get_fill_value | ||
|
|
||
|
|
||
| class Test_get_fill_value(tests.IrisTest): | ||
| def setUp(self): | ||
| # Array shape and fill-value. | ||
| spec = [((2, 3, 4, 5), -1), # 4d array | ||
| ((2, 3, 4), -2), # 3d array | ||
| ((2, 3), -3), # 2d array | ||
| ((2,), -4), # 1d array | ||
| ((), -5)] # 0d array | ||
| self.arrays = [np.empty(shape) for (shape, _) in spec] | ||
| self.masked = [ma.empty(shape, fill_value=fv) for (shape, fv) in spec] | ||
| self.lazy_arrays = [as_lazy_data(array) for array in self.arrays] | ||
| self.lazy_masked = [as_lazy_data(array) for array in self.masked] | ||
| # Add the masked constant case. | ||
| mc = ma.array([0], mask=True)[0] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be better to specify a definite, non-default fill-value here, like all the other cases. |
||
| self.masked.append(mc) | ||
| self.lazy_masked.append(as_lazy_data(mc)) | ||
| # Collect the expected fill-values. | ||
| self.expected_fill_values = [fv for (_, fv) in spec] | ||
| mc_fill_value = ma.masked_array(0, dtype=mc.dtype).fill_value | ||
| self.expected_fill_values.append(mc_fill_value) | ||
|
|
||
| def test_arrays(self): | ||
| for array in self.arrays: | ||
| self.assertIsNone(get_fill_value(array)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As you're testing multiple cases, it would be nice to have a custom context message so if one fails the output shows which one it was.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this the moment to use test parametrisation from pytest? It's the future 😉
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Meanwhile, back in the past... nose has poor man's parametrisation in the form of test generators: http://nose.readthedocs.io/en/latest/writing_tests.html#test-generators |
||
|
|
||
| def test_masked(self): | ||
| for array, expected in zip(self.masked, self.expected_fill_values): | ||
| result = get_fill_value(array) | ||
| self.assertEqual(result, expected) | ||
|
|
||
| def test_lazy_arrays(self): | ||
| for array in self.lazy_arrays: | ||
| self.assertIsNone(get_fill_value(array)) | ||
|
|
||
| def test_lazy_masked(self): | ||
| for array, expected in zip(self.lazy_masked, | ||
| self.expected_fill_values): | ||
| result = get_fill_value(array) | ||
| self.assertEqual(result, expected) | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| tests.main() | ||
This file was deleted.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I would use
data = data.flatten()[0:0]instead of all this : The intent would be clearer, no ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Functionally quite different though - for NetCDF that could mean loading the entire dataset and then discarding all the actual values.