diff --git a/conda-requirements.txt b/conda-requirements.txt index 3324c5fbc4..cd89693e57 100644 --- a/conda-requirements.txt +++ b/conda-requirements.txt @@ -10,6 +10,7 @@ numpy pyke udunits2 cf_units +dask # Iris build dependencies setuptools diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py new file mode 100644 index 0000000000..1433e66620 --- /dev/null +++ b/lib/iris/_lazy_data.py @@ -0,0 +1,71 @@ +# (C) British Crown Copyright 2017, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +""" +Routines for lazy data handling. + +To avoid replicating implementation-dependent test and conversion code. + +""" +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +import dask.array as da + + +def is_lazy_data(data): + """ + Return whether the argument is an Iris 'lazy' data array. + + At present, this means simply a Dask array. + We determine this by checking for a "compute" property. + + """ + return hasattr(data, 'compute') + + +def as_concrete_data(data): + """ + Return the actual content of the argument, as a numpy array. + + If lazy, return the realised data, otherwise return the argument unchanged. + + """ + if is_lazy_data(data): + data = data.compute() + return data + + +# A magic value, borrowed from biggus +_MAX_CHUNK_SIZE = 8 * 1024 * 1024 * 2 + + +def as_lazy_data(data): + """ + Return a lazy equivalent of the argument, as a lazy array. + + For an existing dask array, return it unchanged. + Otherwise, return the argument wrapped with dask.array.from_array. + This assumes the underlying object has numpy-array-like properties. + + .. Note:: + + For now at least, chunksize is set to an arbitrary fixed value. + + """ + if not is_lazy_data(data): + data = da.from_array(data, chunks=_MAX_CHUNK_SIZE) + return data diff --git a/lib/iris/tests/unit/lazy_data/__init__.py b/lib/iris/tests/unit/lazy_data/__init__.py new file mode 100644 index 0000000000..9eed1ff4c0 --- /dev/null +++ b/lib/iris/tests/unit/lazy_data/__init__.py @@ -0,0 +1,20 @@ +# (C) British Crown Copyright 2017, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +"""Unit tests for the :mod:`iris._lazy_data` module.""" + +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa diff --git a/lib/iris/tests/unit/lazy_data/test_as_concrete_data.py b/lib/iris/tests/unit/lazy_data/test_as_concrete_data.py new file mode 100644 index 0000000000..760af08872 --- /dev/null +++ b/lib/iris/tests/unit/lazy_data/test_as_concrete_data.py @@ -0,0 +1,48 @@ +# (C) British Crown Copyright 2017, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +"""Test :meth:`iris._lazy data.as_concrete_data` method.""" + +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + +import numpy as np +import dask.array as da + +from iris._lazy_data import is_lazy_data, as_concrete_data + + +class Test_as_concrete_data(tests.IrisTest): + def test_lazy(self): + lazy_values = np.arange(30).reshape((2, 5, 3)) + lazy_array = da.from_array(lazy_values, 1e6) + result = as_concrete_data(lazy_array) + self.assertFalse(is_lazy_data(result)) + self.assertArrayAllClose(result, lazy_values) + + def test_real(self): + real_array = np.arange(24).reshape((2, 3, 4)) + result = as_concrete_data(real_array) + self.assertFalse(is_lazy_data(result)) + self.assertIs(result, real_array) + + +if __name__ == '__main__': + tests.main() diff --git a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py new file mode 100644 index 0000000000..8400c66c4f --- /dev/null +++ b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py @@ -0,0 +1,50 @@ +# (C) British Crown Copyright 2017, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +"""Test :meth:`iris._lazy data.as_lazy_data` method.""" + +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + + +import numpy as np +import dask.array as da + +from iris._lazy_data import as_lazy_data, as_concrete_data, is_lazy_data + + +class Test_as_lazy_data(tests.IrisTest): + def test_lazy(self): + lazy_values = np.arange(30).reshape((2, 5, 3)) + lazy_array = da.from_array(lazy_values, 1e6) + result = as_lazy_data(lazy_array) + self.assertTrue(is_lazy_data(result)) + self.assertIs(result, lazy_array) + + def test_real(self): + real_array = np.arange(24).reshape((2, 3, 4)) + result = as_lazy_data(real_array) + self.assertTrue(is_lazy_data(result)) + self.assertArrayAllClose(as_concrete_data(result), + real_array) + + +if __name__ == '__main__': + tests.main() diff --git a/lib/iris/tests/unit/lazy_data/test_is_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_is_lazy_data.py new file mode 100644 index 0000000000..90e0ff6aff --- /dev/null +++ b/lib/iris/tests/unit/lazy_data/test_is_lazy_data.py @@ -0,0 +1,44 @@ +# (C) British Crown Copyright 2017, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +"""Test :meth:`iris._lazy data.is_lazy_data` method.""" + +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + +import numpy as np +import dask.array as da + +from iris._lazy_data import is_lazy_data + + +class Test_is_lazy_data(tests.IrisTest): + def test_lazy(self): + lazy_values = np.arange(30).reshape((2, 5, 3)) + lazy_array = da.from_array(lazy_values, 1e6) + self.assertTrue(is_lazy_data(lazy_array)) + + def test_real(self): + real_array = np.arange(24).reshape((2, 3, 4)) + self.assertFalse(is_lazy_data(real_array)) + + +if __name__ == '__main__': + tests.main() diff --git a/minimal-conda-requirements.txt b/minimal-conda-requirements.txt index 5299e438e9..a87c787ec9 100644 --- a/minimal-conda-requirements.txt +++ b/minimal-conda-requirements.txt @@ -10,6 +10,7 @@ numpy pyke udunits2 cf_units +dask # Iris build dependencies setuptools