diff --git a/docs/iris/src/whatsnew/contributions_2.1/newfeature_2018-Feb-04_lazy_convert_units.txt b/docs/iris/src/whatsnew/contributions_2.1/newfeature_2018-Feb-04_lazy_convert_units.txt new file mode 100644 index 0000000000..ac30589dd5 --- /dev/null +++ b/docs/iris/src/whatsnew/contributions_2.1/newfeature_2018-Feb-04_lazy_convert_units.txt @@ -0,0 +1,4 @@ +* The methods :meth:`iris.cube.Cube.convert_units` and + :meth:`iris.coords.Coord.convert_units` no longer forcibly realise the cube + data or coordinate points/bounds : The converted values are now lazy arrays + if the originals were. diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 285aa8d9b1..fba9b1f586 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -220,3 +220,36 @@ def co_realise_cubes(*cubes): results = _co_realise_lazy_arrays([cube.core_data() for cube in cubes]) for cube, result in zip(cubes, results): cube.data = result + + +def lazy_elementwise(lazy_array, elementwise_op): + """ + Apply a (numpy-style) elementwise array operation to a lazy array. + + Elementwise means that it performs a independent calculation at each point + of the input, producing a result array of the same shape. + + Args: + + * lazy_array: + The lazy array object to operate on. + * elementwise_op: + The elementwise operation, a function operating on numpy arrays. + + .. note: + + A single-point "dummy" call is made to the operation function, to + determine dtype of the result. + This return dtype must be stable in actual operation (!) + + """ + # This is just a wrapper to provide an Iris-specific abstraction for a + # lazy operation in Dask (map_blocks). + + # Explicitly determine the return type with a dummy call. + # This makes good practical sense for unit conversions, as a Unit.convert + # call may cast to float, or not, depending on unit equality : Thus, it's + # much safer to get udunits to decide that for us. + dtype = elementwise_op(np.zeros(1, lazy_array.dtype)).dtype + + return da.map_blocks(elementwise_op, lazy_array, dtype=dtype) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 94926776d4..16701ad27c 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -38,7 +38,8 @@ from iris._data_manager import DataManager from iris._deprecation import warn_deprecated -from iris._lazy_data import as_concrete_data, is_lazy_data, multidim_lazy_stack +from iris._lazy_data import (as_concrete_data, is_lazy_data, + multidim_lazy_stack, lazy_elementwise) import iris.aux_factory import iris.exceptions import iris.time @@ -908,9 +909,28 @@ def convert_units(self, unit): raise iris.exceptions.UnitConversionError( 'Cannot convert from unknown units. ' 'The "coord.units" attribute may be set directly.') - self.points = self.units.convert(self.points, unit) + if self.has_lazy_points() or self.has_lazy_bounds(): + # Make fixed copies of old + new units for a delayed conversion. + old_unit = self.units + new_unit = unit + + # Define a delayed conversion operation (i.e. a callback). + def pointwise_convert(values): + return old_unit.convert(values, new_unit) + + if self.has_lazy_points(): + new_points = lazy_elementwise(self.lazy_points(), + pointwise_convert) + else: + new_points = self.units.convert(self.points, unit) + self.points = new_points if self.has_bounds(): - self.bounds = self.units.convert(self.bounds, unit) + if self.has_lazy_bounds(): + new_bounds = lazy_elementwise(self.lazy_bounds(), + pointwise_convert) + else: + new_bounds = self.units.convert(self.bounds, unit) + self.bounds = new_bounds self.units = unit def cells(self): diff --git a/lib/iris/cube.py b/lib/iris/cube.py index 7ffb235c6d..ca6e099b7a 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -41,6 +41,7 @@ import iris._concatenate import iris._constraints from iris._data_manager import DataManager +from iris._lazy_data import lazy_elementwise import iris._merge import iris.analysis @@ -873,7 +874,19 @@ def convert_units(self, unit): raise iris.exceptions.UnitConversionError( 'Cannot convert from unknown units. ' 'The "cube.units" attribute may be set directly.') - self.data = self.units.convert(self.data, unit) + if self.has_lazy_data(): + # Make fixed copies of old + new units for a delayed conversion. + old_unit = self.units + new_unit = unit + + # Define a delayed conversion operation (i.e. a callback). + def pointwise_convert(values): + return old_unit.convert(values, new_unit) + + new_data = lazy_elementwise(self.lazy_data(), pointwise_convert) + else: + new_data = self.units.convert(self.data, unit) + self.data = new_data self.units = unit def add_cell_method(self, cell_method): diff --git a/lib/iris/tests/unit/coords/test_AuxCoord.py b/lib/iris/tests/unit/coords/test_AuxCoord.py index d9b7c09598..39e5048b14 100644 --- a/lib/iris/tests/unit/coords/test_AuxCoord.py +++ b/lib/iris/tests/unit/coords/test_AuxCoord.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2017, Met Office +# (C) British Crown Copyright 2017 - 2018, Met Office # # This file is part of Iris. # @@ -35,7 +35,9 @@ lazyness_string, coords_all_dtypes_and_lazynesses) +from cf_units import Unit from iris.coords import AuxCoord +from iris._lazy_data import as_lazy_data class AuxCoordTestMixin(CoordTestMixin): @@ -603,5 +605,24 @@ def test_set_bounds_with_lazy_points(self): self.assertTrue(coord.has_lazy_points()) +class Test_convert_units(tests.IrisTest): + def test_preserves_lazy(self): + test_bounds = np.array([[[11.0, 12.0], [12.0, 13.0], [13.0, 14.0]], + [[21.0, 22.0], [22.0, 23.0], [23.0, 24.0]]]) + test_points = np.array([[11.1, 12.2, 13.3], + [21.4, 22.5, 23.6]]) + lazy_points = as_lazy_data(test_points) + lazy_bounds = as_lazy_data(test_bounds) + coord = AuxCoord(points=lazy_points, bounds=lazy_bounds, + units='m') + coord.convert_units('ft') + self.assertTrue(coord.has_lazy_points()) + self.assertTrue(coord.has_lazy_bounds()) + test_points_ft = Unit('m').convert(test_points, 'ft') + test_bounds_ft = Unit('m').convert(test_bounds, 'ft') + self.assertArrayAllClose(coord.points, test_points_ft) + self.assertArrayAllClose(coord.bounds, test_bounds_ft) + + if __name__ == '__main__': tests.main() diff --git a/lib/iris/tests/unit/cube/test_Cube.py b/lib/iris/tests/unit/cube/test_Cube.py index 89aaef355d..9c96cf6b48 100644 --- a/lib/iris/tests/unit/cube/test_Cube.py +++ b/lib/iris/tests/unit/cube/test_Cube.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2013 - 2017, Met Office +# (C) British Crown Copyright 2013 - 2018, Met Office # # This file is part of Iris. # @@ -28,6 +28,8 @@ import numpy as np import numpy.ma as ma +from cf_units import Unit + import iris.analysis import iris.aux_factory import iris.coords @@ -39,9 +41,9 @@ from iris.coords import AuxCoord, DimCoord, CellMeasure from iris.exceptions import (CoordinateNotFoundError, CellMeasureNotFoundError, UnitConversionError) +from iris._lazy_data import as_lazy_data from iris.tests import mock import iris.tests.stock as stock -from iris._lazy_data import as_lazy_data class Test___init___data(tests.IrisTest): @@ -1710,6 +1712,15 @@ def test_convert_unknown_units(self): with self.assertRaisesRegexp(UnitConversionError, emsg): cube.convert_units('mm day-1') + def test_preserves_lazy(self): + real_data = np.arange(12.).reshape((3, 4)) + lazy_data = as_lazy_data(real_data) + cube = iris.cube.Cube(lazy_data, units='m') + real_data_ft = Unit('m').convert(real_data, 'ft') + cube.convert_units('ft') + self.assertTrue(cube.has_lazy_data()) + self.assertArrayAllClose(cube.data, real_data_ft) + if __name__ == '__main__': tests.main() diff --git a/lib/iris/tests/unit/lazy_data/test_lazy_elementwise.py b/lib/iris/tests/unit/lazy_data/test_lazy_elementwise.py new file mode 100644 index 0000000000..e813320758 --- /dev/null +++ b/lib/iris/tests/unit/lazy_data/test_lazy_elementwise.py @@ -0,0 +1,65 @@ +# (C) British Crown Copyright 2018, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +"""Test function :func:`iris._lazy data.lazy_elementwise`.""" + +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + +import numpy as np + +from iris._lazy_data import as_lazy_data, is_lazy_data + +from iris._lazy_data import lazy_elementwise + + +def _test_elementwise_op(array): + # Promotes the type of a bool argument, but not a float. + return array + 1 + + +class Test_lazy_elementwise(tests.IrisTest): + def test_basic(self): + concrete_array = np.arange(30).reshape((2, 5, 3)) + lazy_array = as_lazy_data(concrete_array) + wrapped = lazy_elementwise(lazy_array, _test_elementwise_op) + self.assertTrue(is_lazy_data(wrapped)) + self.assertArrayAllClose(wrapped.compute(), + _test_elementwise_op(concrete_array)) + + def test_dtype_same(self): + concrete_array = np.array([3.], dtype=np.float16) + lazy_array = as_lazy_data(concrete_array) + wrapped = lazy_elementwise(lazy_array, _test_elementwise_op) + self.assertTrue(is_lazy_data(wrapped)) + self.assertEqual(wrapped.dtype, np.float16) + self.assertEqual(wrapped.compute().dtype, np.float16) + + def test_dtype_change(self): + concrete_array = np.array([True, False]) + lazy_array = as_lazy_data(concrete_array) + wrapped = lazy_elementwise(lazy_array, _test_elementwise_op) + self.assertTrue(is_lazy_data(wrapped)) + self.assertEqual(wrapped.dtype, np.int) + self.assertEqual(wrapped.compute().dtype, wrapped.dtype) + + +if __name__ == '__main__': + tests.main()