diff --git a/docs/iris/src/whatsnew/1.6.rst b/docs/iris/src/whatsnew/1.6.rst index 33aaa50d51..b830975643 100644 --- a/docs/iris/src/whatsnew/1.6.rst +++ b/docs/iris/src/whatsnew/1.6.rst @@ -7,7 +7,9 @@ What's new in Iris 1.6 Iris 1.6 features ================= -* N/A +* A new utility function makes it easy to "shortcut" slow calculations or + file loads by caching stored results. See + :meth:`iris.util.file_is_newer_than`. Bugs fixed ---------- diff --git a/lib/iris/io/__init__.py b/lib/iris/io/__init__.py index 4937314ec9..196fa1e98f 100644 --- a/lib/iris/io/__init__.py +++ b/lib/iris/io/__init__.py @@ -143,31 +143,53 @@ def decode_uri(uri, default='file'): return scheme, part -def load_files(filenames, callback): +def expand_filespecs(file_specs): """ - Takes a list of filenames which may also be globs, and optionally a - callback function, and returns a generator of Cubes from the given files. + Find all matching file paths from a list of file-specs. - .. note:: + Args: - Typically, this function should not be called directly; instead, the - intended interface for loading is :func:`iris.load`. + * file_specs (iterable of string): + File paths which may contain '~' elements or wildcards. + + Returns: + A list of matching file paths. If any of the file-specs matches no + existing files, an exception is raised. """ # Remove any hostname component - currently unused - filenames = [os.path.expanduser(fn[2:] if fn.startswith('//') else fn) for fn in filenames] + filenames = [os.path.expanduser(fn[2:] if fn.startswith('//') else fn) + for fn in file_specs] # Try to expand all filenames as globs glob_expanded = {fn : sorted(glob.glob(fn)) for fn in filenames} - # If any of the filenames or globs expanded to an empty list then raise an error - if not all(glob_expanded.viewvalues()): + # If any of the specs expanded to an empty list then raise an error + value_lists = glob_expanded.viewvalues() + if not all(value_lists): raise IOError("One or more of the files specified did not exist %s." % - ["%s expanded to %s" % (pattern, expanded if expanded else "empty") for pattern, expanded in glob_expanded.iteritems()]) + ["%s expanded to %s" % (pattern, expanded if expanded else "empty") + for pattern, expanded in glob_expanded.iteritems()]) + + return sum(value_lists, []) + + +def load_files(filenames, callback): + """ + Takes a list of filenames which may also be globs, and optionally a + callback function, and returns a generator of Cubes from the given files. + + .. note:: + + Typically, this function should not be called directly; instead, the + intended interface for loading is :func:`iris.load`. + + """ + all_file_paths = expand_filespecs(filenames) # Create default dict mapping iris format handler to its associated filenames handler_map = collections.defaultdict(list) - for fn in sum([x for x in glob_expanded.viewvalues()], []): + for fn in all_file_paths: with open(fn) as fh: handling_format_spec = iris.fileformats.FORMAT_AGENT.get_spec(os.path.basename(fn), fh) handler_map[handling_format_spec].append(fn) diff --git a/lib/iris/tests/test_load.py b/lib/iris/tests/test_load.py index b3f1a0d71f..a52b4c9683 100644 --- a/lib/iris/tests/test_load.py +++ b/lib/iris/tests/test_load.py @@ -34,6 +34,26 @@ def test_normal(self): cubes = iris.load(paths) self.assertEqual(len(cubes), 1) + def test_nonexist(self): + paths = ( + tests.get_data_path(['PP', 'aPPglob1', 'global.pp']), + tests.get_data_path(['PP', '_guaranteed_non_exist.pp']), + ) + with self.assertRaises(IOError) as error_trap: + cubes = iris.load(paths) + self.assertTrue(error_trap.exception.message.startswith( + 'One or more of the files specified did not exist')) + + def test_nonexist_wild(self): + paths = ( + tests.get_data_path(['PP', 'aPPglob1', 'global.pp']), + tests.get_data_path(['PP', '_guaranteed_non_exist_*.pp']), + ) + with self.assertRaises(IOError) as error_trap: + cubes = iris.load(paths) + self.assertTrue(error_trap.exception.message.startswith( + 'One or more of the files specified did not exist')) + def test_bogus(self): paths = ( tests.get_data_path(['PP', 'aPPglob1', 'global.pp']), diff --git a/lib/iris/tests/unit/__init__.py b/lib/iris/tests/unit/__init__.py index 6356851d91..c9ee5609da 100644 --- a/lib/iris/tests/unit/__init__.py +++ b/lib/iris/tests/unit/__init__.py @@ -14,4 +14,4 @@ # # You should have received a copy of the GNU Lesser General Public License # along with Iris. If not, see . -"""Unit tests for the `iris` package.""" +"""Unit tests for the :mod:`iris` package.""" diff --git a/lib/iris/tests/unit/cube/__init__.py b/lib/iris/tests/unit/cube/__init__.py index 96c1f40348..2ac2d0ee09 100644 --- a/lib/iris/tests/unit/cube/__init__.py +++ b/lib/iris/tests/unit/cube/__init__.py @@ -14,4 +14,4 @@ # # You should have received a copy of the GNU Lesser General Public License # along with Iris. If not, see . -"""Unit tests for the iris.cube module.""" +"""Unit tests for the :mod:`iris.cube` module.""" diff --git a/lib/iris/tests/unit/experimental/__init__.py b/lib/iris/tests/unit/experimental/__init__.py index e4db36ef1f..d29d4170e7 100644 --- a/lib/iris/tests/unit/experimental/__init__.py +++ b/lib/iris/tests/unit/experimental/__init__.py @@ -14,4 +14,4 @@ # # You should have received a copy of the GNU Lesser General Public License # along with Iris. If not, see . -"""Unit tests for the `iris.experimental` package.""" +"""Unit tests for the :mod:`iris.experimental` package.""" diff --git a/lib/iris/tests/unit/experimental/raster/__init__.py b/lib/iris/tests/unit/experimental/raster/__init__.py index ea2c3c8292..b41a305d1f 100644 --- a/lib/iris/tests/unit/experimental/raster/__init__.py +++ b/lib/iris/tests/unit/experimental/raster/__init__.py @@ -14,4 +14,4 @@ # # You should have received a copy of the GNU Lesser General Public License # along with Iris. If not, see . -"""Unit tests for the `iris.experimental.raster` module.""" +"""Unit tests for the :mod:`iris.experimental.raster` module.""" diff --git a/lib/iris/tests/unit/fileformats/__init__.py b/lib/iris/tests/unit/fileformats/__init__.py index d25a9c8dd2..13d895007a 100644 --- a/lib/iris/tests/unit/fileformats/__init__.py +++ b/lib/iris/tests/unit/fileformats/__init__.py @@ -14,4 +14,4 @@ # # You should have received a copy of the GNU Lesser General Public License # along with Iris. If not, see . -"""Unit tests for the `iris.fileformats` package.""" +"""Unit tests for the :mod:`iris.fileformats` package.""" diff --git a/lib/iris/tests/unit/fileformats/grib/__init__.py b/lib/iris/tests/unit/fileformats/grib/__init__.py index 3aba330427..3e362eb635 100644 --- a/lib/iris/tests/unit/fileformats/grib/__init__.py +++ b/lib/iris/tests/unit/fileformats/grib/__init__.py @@ -14,4 +14,4 @@ # # You should have received a copy of the GNU Lesser General Public License # along with Iris. If not, see . -"""Unit tests for the `iris.fileformats.grib` package.""" +"""Unit tests for the :mod:`iris.fileformats.grib` package.""" diff --git a/lib/iris/tests/unit/fileformats/grib/grib_save_rules/__init__.py b/lib/iris/tests/unit/fileformats/grib/grib_save_rules/__init__.py index 43fdbec7ca..8e711f3626 100644 --- a/lib/iris/tests/unit/fileformats/grib/grib_save_rules/__init__.py +++ b/lib/iris/tests/unit/fileformats/grib/grib_save_rules/__init__.py @@ -14,4 +14,4 @@ # # You should have received a copy of the GNU Lesser General Public License # along with Iris. If not, see . -"""Unit tests for the `iris.fileformats.grib.grib_save_rules` module.""" +"""Unit tests for the :mod:`iris.fileformats.grib.grib_save_rules` module.""" diff --git a/lib/iris/tests/unit/fileformats/netcdf/__init__.py b/lib/iris/tests/unit/fileformats/netcdf/__init__.py index b74bd578d7..87521d0779 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/__init__.py +++ b/lib/iris/tests/unit/fileformats/netcdf/__init__.py @@ -14,4 +14,4 @@ # # You should have received a copy of the GNU Lesser General Public License # along with Iris. If not, see . -"""Unit tests for the `iris.fileformats.netcdf` module.""" +"""Unit tests for the :mod:`iris.fileformats.netcdf` module.""" diff --git a/lib/iris/tests/unit/util/__init__.py b/lib/iris/tests/unit/util/__init__.py new file mode 100644 index 0000000000..ad11c3d325 --- /dev/null +++ b/lib/iris/tests/unit/util/__init__.py @@ -0,0 +1,17 @@ +# (C) British Crown Copyright 2013, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +"""Unit tests for the :mod:`iris.util` module.""" diff --git a/lib/iris/tests/unit/util/test_file_is_newer_than.py b/lib/iris/tests/unit/util/test_file_is_newer_than.py new file mode 100644 index 0000000000..9d56efbbdc --- /dev/null +++ b/lib/iris/tests/unit/util/test_file_is_newer_than.py @@ -0,0 +1,129 @@ +# (C) British Crown Copyright 2010 - 2013, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +""" +Test function :meth:`iris.util.test_file_is_newer`. + +""" +# import iris tests first so that some things can be initialised before +# importing anything else +import iris.tests as tests + +import os.path +import shutil +import tempfile +import time +import unittest + +from iris.util import file_is_newer_than + + +class TestFileIsNewer(tests.IrisTest): + """Test the :meth:`iris.util.file_is_newer_than` function.""" + + def _name2path(self, filename): + """Add the temporary dirpath to a filename to make a full path.""" + return os.path.join(self.temp_dir, filename) + + def setUp(self): + # make a temporary directory with testfiles of known timestamp order. + self.temp_dir = tempfile.mkdtemp('_testfiles_tempdir') + # define the names of some files to create + create_file_names = ['older_source_1', 'older_source_2', + 'example_result', + 'newer_source_1', 'newer_source_2'] + # create test files in given name order (!important!) + for file_name in create_file_names: + file_path = self._name2path(file_name) + with open(file_path, 'w') as file: + file.write('..content..') + # Needs a tiny pause to prevent possibly equal timestamps + time.sleep(0.002) + + def tearDown(self): + # destroy whole contents of temporary directory + shutil.rmtree(self.temp_dir) + + def _test(self, boolean_result, result_name, source_names): + """Test expected result of executing with given args.""" + # Make args into full paths + result_path = self._name2path(result_name) + if isinstance(source_names, basestring): + source_paths = self._name2path(source_names) + else: + source_paths = [self._name2path(name) + for name in source_names] + # Check result is as expected. + self.assertEqual( + boolean_result, + file_is_newer_than(result_path, source_paths)) + + def test_no_sources(self): + self._test(True, 'example_result', []) + + def test_string_ok(self): + self._test(True, 'example_result', 'older_source_1') + + def test_string_fail(self): + self._test(False, 'example_result', 'newer_source_1') + + def test_self_result(self): + # This fails, because same-timestamp is *not* acceptable. + self._test(False, 'example_result', 'example_result') + + def test_single_ok(self): + self._test(True, 'example_result', ['older_source_2']) + + def test_single_fail(self): + self._test(False, 'example_result', ['newer_source_2']) + + def test_multiple_ok(self): + self._test(True, 'example_result', ['older_source_1', + 'older_source_2']) + + def test_multiple_fail(self): + self._test(False, 'example_result', ['older_source_1', + 'older_source_2', + 'newer_source_1']) + + def test_wild_ok(self): + self._test(True, 'example_result', ['older_sour*_*']) + + def test_wild_fail(self): + self._test(False, 'example_result', ['older_sour*', 'newer_sour*']) + + def test_error_missing_result(self): + with self.assertRaises(OSError) as error_trap: + self._test(False, 'non_exist', ['older_sour*']) + error = error_trap.exception + self.assertEqual(error.strerror, 'No such file or directory') + self.assertEqual(error.filename, self._name2path('non_exist')) + + def test_error_missing_source(self): + with self.assertRaises(IOError) as error_trap: + self._test(False, 'example_result', ['older_sour*', 'non_exist']) + self.assertTrue(error_trap.exception.message.startswith( + 'One or more of the files specified did not exist')) + + def test_error_missing_wild(self): + with self.assertRaises(IOError) as error_trap: + self._test(False, 'example_result', ['older_sour*', 'unknown_*']) + self.assertTrue(error_trap.exception.message.startswith( + 'One or more of the files specified did not exist')) + + +if __name__ == '__main__': + unittest.main() diff --git a/lib/iris/util.py b/lib/iris/util.py index e2ef400270..e599357609 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -24,6 +24,7 @@ import copy import inspect import os +import os.path import sys import tempfile import time @@ -1039,3 +1040,60 @@ def add_coord(coord): new_cube.add_aux_factory(factory.updated(coord_mapping)) return new_cube + + +def file_is_newer_than(result_path, source_paths): + """ + Return whether the 'result' file has a later modification time than all of + the 'source' files. + + If a stored result depends entirely on known 'sources', it need only be + re-built when one of them changes. This function can be used to test that + by comparing file timestamps. + + Args: + + * result_path (string): + The filepath of a file containing some derived result data. + * source_paths (string or iterable of strings): + The path(s) to the original datafiles used to make the result. May + include wildcards and '~' expansions (like Iris load paths), but not + URIs. + + Returns: + True if all the sources are older than the result, else False. + + If any of the file paths describes no existing files, an exception will + be raised. + + .. note:: + There are obvious caveats to using file timestamps for this, as correct + usage depends on how the sources might change. For example, a file + could be replaced by one of the same name, but an older timestamp. + + If wildcards and '~' expansions are used, this introduces even more + uncertainty, as then you cannot even be sure that the resulting list of + file names is the same as the originals. For example, some files may + have been deleted or others added. + + .. note:: + The result file may often be a :mod:`pickle` file. In that case, it + also depends on the relevant module sources, so extra caution is + required. Ideally, an additional check on iris.__version__ is advised. + + """ + # Accept a string as a single source path + if isinstance(source_paths, basestring): + source_paths = [source_paths] + # Fix our chosen timestamp function + file_date = os.path.getmtime + # Get the 'result file' time + result_timestamp = file_date(result_path) + # Get all source filepaths, with normal Iris.io load helper function + source_file_paths = iris.io.expand_filespecs(source_paths) + # Compare each filetime, for each spec, with the 'result time' + for path in source_file_paths: + source_timestamp = file_date(path) + if source_timestamp >= result_timestamp: + return False + return True