SciTools · rhattersley · Oct 9, 2013 · Oct 8, 2013 · Oct 9, 2013 · Oct 9, 2013
diff --git a/docs/iris/src/whatsnew/1.6.rst b/docs/iris/src/whatsnew/1.6.rst
@@ -7,7 +7,9 @@ What's new in Iris 1.6
 
 Iris 1.6 features
 =================
-* N/A
+* A new utility function makes it easy to "shortcut" slow calculations or
+  file loads by caching stored results. See
+  :meth:`iris.util.file_is_newer_than`.
 
 Bugs fixed
 ----------

diff --git a/lib/iris/io/__init__.py b/lib/iris/io/__init__.py
@@ -143,31 +143,53 @@ def decode_uri(uri, default='file'):
     return scheme, part
 
 
-def load_files(filenames, callback):
+def expand_filespecs(file_specs):
     """
-    Takes a list of filenames which may also be globs, and optionally a
-    callback function, and returns a generator of Cubes from the given files.
+    Find all matching file paths from a list of file-specs.
 
-    .. note::
+    Args:
 
-        Typically, this function should not be called directly; instead, the
-        intended interface for loading is :func:`iris.load`.
+    * file_specs (iterable of string):
+        File paths which may contain '~' elements or wildcards.
+
+    Returns:
+        A list of matching file paths.  If any of the file-specs matches no
+        existing files, an exception is raised.
 
     """
     # Remove any hostname component - currently unused
-    filenames = [os.path.expanduser(fn[2:] if fn.startswith('//') else fn) for fn in filenames]
+    filenames = [os.path.expanduser(fn[2:] if fn.startswith('//') else fn)
+                 for fn in file_specs]
 
     # Try to expand all filenames as globs
     glob_expanded = {fn : sorted(glob.glob(fn)) for fn in filenames}
 
-    # If any of the filenames or globs expanded to an empty list then raise an error
-    if not all(glob_expanded.viewvalues()):
+    # If any of the specs expanded to an empty list then raise an error
+    value_lists = glob_expanded.viewvalues()
+    if not all(value_lists):
         raise IOError("One or more of the files specified did not exist %s." %
-        ["%s expanded to %s" % (pattern, expanded if expanded else "empty") for pattern, expanded in glob_expanded.iteritems()])
+        ["%s expanded to %s" % (pattern, expanded if expanded else "empty")
+         for pattern, expanded in glob_expanded.iteritems()])
+
+    return sum(value_lists, [])
+
+
+def load_files(filenames, callback):
+    """
+    Takes a list of filenames which may also be globs, and optionally a
+    callback function, and returns a generator of Cubes from the given files.
+
+    .. note::
+
+        Typically, this function should not be called directly; instead, the
+        intended interface for loading is :func:`iris.load`.
+
+    """
+    all_file_paths = expand_filespecs(filenames)
 
     # Create default dict mapping iris format handler to its associated filenames
     handler_map = collections.defaultdict(list)
-    for fn in sum([x for x in glob_expanded.viewvalues()], []):
+    for fn in all_file_paths:
         with open(fn) as fh:
             handling_format_spec = iris.fileformats.FORMAT_AGENT.get_spec(os.path.basename(fn), fh)
             handler_map[handling_format_spec].append(fn)

diff --git a/lib/iris/tests/test_load.py b/lib/iris/tests/test_load.py
@@ -34,6 +34,26 @@ def test_normal(self):
         cubes = iris.load(paths)
         self.assertEqual(len(cubes), 1)
 
+    def test_nonexist(self):
+        paths = (
+            tests.get_data_path(['PP', 'aPPglob1', 'global.pp']),
+            tests.get_data_path(['PP', '_guaranteed_non_exist.pp']),
+        )
+        with self.assertRaises(IOError) as error_trap:
+            cubes = iris.load(paths)
+        self.assertTrue(error_trap.exception.message.startswith(
+            'One or more of the files specified did not exist'))
+
+    def test_nonexist_wild(self):
+        paths = (
+            tests.get_data_path(['PP', 'aPPglob1', 'global.pp']),
+            tests.get_data_path(['PP', '_guaranteed_non_exist_*.pp']),
+        )
+        with self.assertRaises(IOError) as error_trap:
+            cubes = iris.load(paths)
+        self.assertTrue(error_trap.exception.message.startswith(
+            'One or more of the files specified did not exist'))
+
     def test_bogus(self):
         paths = (
             tests.get_data_path(['PP', 'aPPglob1', 'global.pp']),

diff --git a/lib/iris/tests/unit/__init__.py b/lib/iris/tests/unit/__init__.py
@@ -14,4 +14,4 @@
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with Iris.  If not, see <http://www.gnu.org/licenses/>.
-"""Unit tests for the `iris` package."""
+"""Unit tests for the :mod:`iris` package."""
diff --git a/lib/iris/tests/unit/cube/__init__.py b/lib/iris/tests/unit/cube/__init__.py
@@ -14,4 +14,4 @@
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with Iris.  If not, see <http://www.gnu.org/licenses/>.
-"""Unit tests for the iris.cube module."""
+"""Unit tests for the :mod:`iris.cube` module."""
diff --git a/lib/iris/tests/unit/experimental/__init__.py b/lib/iris/tests/unit/experimental/__init__.py
@@ -14,4 +14,4 @@
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with Iris.  If not, see <http://www.gnu.org/licenses/>.
-"""Unit tests for the `iris.experimental` package."""
+"""Unit tests for the :mod:`iris.experimental` package."""
diff --git a/lib/iris/tests/unit/experimental/raster/__init__.py b/lib/iris/tests/unit/experimental/raster/__init__.py
@@ -14,4 +14,4 @@
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with Iris.  If not, see <http://www.gnu.org/licenses/>.
-"""Unit tests for the `iris.experimental.raster` module."""
+"""Unit tests for the :mod:`iris.experimental.raster` module."""
diff --git a/lib/iris/tests/unit/fileformats/__init__.py b/lib/iris/tests/unit/fileformats/__init__.py
@@ -14,4 +14,4 @@
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with Iris.  If not, see <http://www.gnu.org/licenses/>.
-"""Unit tests for the `iris.fileformats` package."""
+"""Unit tests for the :mod:`iris.fileformats` package."""
diff --git a/lib/iris/tests/unit/fileformats/grib/__init__.py b/lib/iris/tests/unit/fileformats/grib/__init__.py
@@ -14,4 +14,4 @@
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with Iris.  If not, see <http://www.gnu.org/licenses/>.
-"""Unit tests for the `iris.fileformats.grib` package."""
+"""Unit tests for the :mod:`iris.fileformats.grib` package."""
diff --git a/lib/iris/tests/unit/fileformats/grib/grib_save_rules/__init__.py b/lib/iris/tests/unit/fileformats/grib/grib_save_rules/__init__.py
@@ -14,4 +14,4 @@
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with Iris.  If not, see <http://www.gnu.org/licenses/>.
-"""Unit tests for the `iris.fileformats.grib.grib_save_rules` module."""
+"""Unit tests for the :mod:`iris.fileformats.grib.grib_save_rules` module."""
diff --git a/lib/iris/tests/unit/fileformats/netcdf/__init__.py b/lib/iris/tests/unit/fileformats/netcdf/__init__.py
@@ -14,4 +14,4 @@
 #
 # You should have received a copy of the GNU Lesser General Public License
 # along with Iris.  If not, see <http://www.gnu.org/licenses/>.
-"""Unit tests for the `iris.fileformats.netcdf` module."""
+"""Unit tests for the :mod:`iris.fileformats.netcdf` module."""
diff --git a/lib/iris/tests/unit/util/__init__.py b/lib/iris/tests/unit/util/__init__.py
@@ -0,0 +1,17 @@
+# (C) British Crown Copyright 2013, Met Office
+#
+# This file is part of Iris.
+#
+# Iris is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Iris is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with Iris.  If not, see <http://www.gnu.org/licenses/>.
+"""Unit tests for the :mod:`iris.util` module."""
diff --git a/lib/iris/tests/unit/util/test_file_is_newer_than.py b/lib/iris/tests/unit/util/test_file_is_newer_than.py
@@ -0,0 +1,129 @@
+# (C) British Crown Copyright 2010 - 2013, Met Office
+#
+# This file is part of Iris.
+#
+# Iris is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Iris is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with Iris.  If not, see <http://www.gnu.org/licenses/>.
+"""
+Test function :meth:`iris.util.test_file_is_newer`.
+
+"""
+# import iris tests first so that some things can be initialised before
+# importing anything else
+import iris.tests as tests
+
+import os.path
+import shutil
+import tempfile
+import time
+import unittest
+
+from iris.util import file_is_newer_than
+
+
+class TestFileIsNewer(tests.IrisTest):
+    """Test the :meth:`iris.util.file_is_newer_than` function."""
+
+    def _name2path(self, filename):
+        """Add the temporary dirpath to a filename to make a full path."""
+        return os.path.join(self.temp_dir, filename)
+
+    def setUp(self):
+        # make a temporary directory with testfiles of known timestamp order.
+        self.temp_dir = tempfile.mkdtemp('_testfiles_tempdir')
+        # define the names of some files to create
+        create_file_names = ['older_source_1', 'older_source_2',
+                             'example_result',
+                             'newer_source_1', 'newer_source_2']
+        # create test files in given name order (!important!)
+        for file_name in create_file_names:
+            file_path = self._name2path(file_name)
+            with open(file_path, 'w') as file:
+                file.write('..content..')
+            # Needs a tiny pause to prevent possibly equal timestamps
+            time.sleep(0.002)
+
+    def tearDown(self):
+        # destroy whole contents of temporary directory
+        shutil.rmtree(self.temp_dir)
+
+    def _test(self, boolean_result, result_name, source_names):
+        """Test expected result of executing with given args."""
+        # Make args into full paths
+        result_path = self._name2path(result_name)
+        if isinstance(source_names, basestring):
+            source_paths = self._name2path(source_names)
+        else:
+            source_paths = [self._name2path(name)
+                            for name in source_names]
+        # Check result is as expected.
+        self.assertEqual(
+            boolean_result,
+            file_is_newer_than(result_path, source_paths))
+
+    def test_no_sources(self):
+        self._test(True, 'example_result', [])
+
+    def test_string_ok(self):
+        self._test(True, 'example_result', 'older_source_1')
+
+    def test_string_fail(self):
+        self._test(False, 'example_result', 'newer_source_1')
+
+    def test_self_result(self):
+        # This fails, because same-timestamp is *not* acceptable.
+        self._test(False, 'example_result', 'example_result')
+
+    def test_single_ok(self):
+        self._test(True, 'example_result', ['older_source_2'])
+
+    def test_single_fail(self):
+        self._test(False, 'example_result', ['newer_source_2'])
+
+    def test_multiple_ok(self):
+        self._test(True, 'example_result', ['older_source_1',
+                                            'older_source_2'])
+
+    def test_multiple_fail(self):
+        self._test(False, 'example_result', ['older_source_1',
+                                             'older_source_2',
+                                             'newer_source_1'])
+
+    def test_wild_ok(self):
+        self._test(True, 'example_result', ['older_sour*_*'])
+
+    def test_wild_fail(self):
+        self._test(False, 'example_result', ['older_sour*', 'newer_sour*'])
+
+    def test_error_missing_result(self):
+        with self.assertRaises(OSError) as error_trap:
+            self._test(False, 'non_exist', ['older_sour*'])
+        error = error_trap.exception
+        self.assertEqual(error.strerror, 'No such file or directory')
+        self.assertEqual(error.filename, self._name2path('non_exist'))
+
+    def test_error_missing_source(self):
+        with self.assertRaises(IOError) as error_trap:
+            self._test(False, 'example_result', ['older_sour*', 'non_exist'])
+        self.assertTrue(error_trap.exception.message.startswith(
+            'One or more of the files specified did not exist'))
+
+    def test_error_missing_wild(self):
+        with self.assertRaises(IOError) as error_trap:
+            self._test(False, 'example_result', ['older_sour*', 'unknown_*'])
+        self.assertTrue(error_trap.exception.message.startswith(
+            'One or more of the files specified did not exist'))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/lib/iris/util.py b/lib/iris/util.py
@@ -24,6 +24,7 @@
 import copy
 import inspect
 import os
+import os.path
 import sys
 import tempfile
 import time
@@ -1039,3 +1040,60 @@ def add_coord(coord):
         new_cube.add_aux_factory(factory.updated(coord_mapping))
 
     return new_cube
+
+
+def file_is_newer_than(result_path, source_paths):
+    """
+    Return whether the 'result' file has a later modification time than all of
+    the 'source' files.
+
+    If a stored result depends entirely on known 'sources', it need only be
+    re-built when one of them changes.  This function can be used to test that
+    by comparing file timestamps.
+
+    Args:
+
+    * result_path (string):
+        The filepath of a file containing some derived result data.
+    * source_paths (string or iterable of strings):
+        The path(s) to the original datafiles used to make the result.  May
+        include wildcards and '~' expansions (like Iris load paths), but not
+        URIs.
+
+    Returns:
+        True if all the sources are older than the result, else False.
+
+        If any of the file paths describes no existing files, an exception will
+        be raised.
+
+    .. note::
+        There are obvious caveats to using file timestamps for this, as correct
+        usage depends on how the sources might change.  For example, a file
+        could be replaced by one of the same name, but an older timestamp.
+
+        If wildcards and '~' expansions are used, this introduces even more
+        uncertainty, as then you cannot even be sure that the resulting list of
+        file names is the same as the originals.  For example, some files may
+        have been deleted or others added.
+
+    .. note::
+        The result file may often be a :mod:`pickle` file.  In that case, it
+        also depends on the relevant module sources, so extra caution is
+        required.  Ideally, an additional check on iris.__version__ is advised.
+
+    """
+    # Accept a string as a single source path
+    if isinstance(source_paths, basestring):
+        source_paths = [source_paths]
+    # Fix our chosen timestamp function
+    file_date = os.path.getmtime
+    # Get the 'result file' time
+    result_timestamp = file_date(result_path)
+    # Get all source filepaths, with normal Iris.io load helper function
+    source_file_paths = iris.io.expand_filespecs(source_paths)
+    # Compare each filetime, for each spec, with the 'result time'
+    for path in source_file_paths:
+        source_timestamp = file_date(path)
+        if source_timestamp >= result_timestamp:
+            return False
+    return True