diff --git a/docs/iris/src/whatsnew/contributions_2.1/newfeature_2018-Apr-12_fast_load_file_indices.txt b/docs/iris/src/whatsnew/contributions_2.1/newfeature_2018-Apr-12_fast_load_file_indices.txt new file mode 100644 index 0000000000..c7d74fe88f --- /dev/null +++ b/docs/iris/src/whatsnew/contributions_2.1/newfeature_2018-Apr-12_fast_load_file_indices.txt @@ -0,0 +1,8 @@ +* The :class:`iris.fileformats.um.FieldCollation` objects, which are passed + into load callbacks when using + :func:`iris.fileformats.um.structured_um_loading`, now + have the additional properties : + :data:`iris.fileformats.um.FieldCollation.data_filepath` and + :data:`iris.fileformats.um.FieldCollation.data_field_indices`. + These provide the file locations of the original data fields, which are + otherwise lost in the structured loading process. diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py index f9b3a3a23a..04bd9e952d 100644 --- a/lib/iris/fileformats/pp.py +++ b/lib/iris/fileformats/pp.py @@ -831,7 +831,7 @@ def _pp_attribute_names(header_defn): special_headers = list('_' + name for name in _SPECIAL_HEADERS) extra_data = list(EXTRA_DATA.values()) special_attributes = ['_raw_header', 'raw_lbtim', 'raw_lbpack', - 'boundary_packing'] + 'boundary_packing', '_index_in_structured_load_file'] return normal_headers + special_headers + extra_data + special_attributes @@ -864,6 +864,7 @@ def __init__(self, header=None): self.raw_lbtim = None self.raw_lbpack = None self.boundary_packing = None + self._index_in_structured_load_file = None if header is not None: self.raw_lbtim = header[self.HEADER_DICT['lbtim'][0]] self.raw_lbpack = header[self.HEADER_DICT['lbpack'][0]] diff --git a/lib/iris/fileformats/um/__init__.py b/lib/iris/fileformats/um/__init__.py index 493f022977..746087b2f6 100644 --- a/lib/iris/fileformats/um/__init__.py +++ b/lib/iris/fileformats/um/__init__.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2014 - 2017, Met Office +# (C) British Crown Copyright 2014 - 2018, Met Office # # This file is part of Iris. # @@ -27,7 +27,6 @@ # Publish the FF-replacement features here, and include documentation. from ._ff_replacement import um_to_pp, load_cubes, load_cubes_32bit_ieee -from ._fast_load import structured_um_loading -from ._fast_load_structured_fields import FieldCollation +from ._fast_load import structured_um_loading, FieldCollation __all__ = ['um_to_pp', 'load_cubes', 'load_cubes_32bit_ieee', 'structured_um_loading', 'FieldCollation'] diff --git a/lib/iris/fileformats/um/_fast_load.py b/lib/iris/fileformats/um/_fast_load.py index 94255e8f18..bb0bb933d6 100644 --- a/lib/iris/fileformats/um/_fast_load.py +++ b/lib/iris/fileformats/um/_fast_load.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2016 - 2017, Met Office +# (C) British Crown Copyright 2016 - 2018, Met Office # # This file is part of Iris. # @@ -40,19 +40,67 @@ import threading import os.path +import numpy as np + # Be minimal about what we import from iris, to avoid circular imports. # Below, other parts of iris.fileformats are accessed via deferred imports. import iris from iris.coords import DimCoord from iris.cube import CubeList from iris.exceptions import TranslationError - +from iris.fileformats.um._fast_load_structured_fields import \ + BasicFieldCollation, group_structured_fields # Strings to identify the PP and FF file format handler specs. _FF_SPEC_NAME = 'UM Fieldsfile' _PP_SPEC_NAME = 'UM Post Processing file' +class FieldCollation(BasicFieldCollation): + # This class specialises the BasicFieldCollation by adding the file-index + # and file-path concepts. + # This preserves the more abstract scope of the original 'FieldCollation' + # class, now renamed 'BasicFieldCollation'. + + def __init__(self, fields, filepath): + """ + Args: + + * fields (iterable of :class:`iris.fileformats.pp.PPField`): + The fields in the collation. + + * filepath (string): + The path of the file the collation is loaded from. + + """ + super(FieldCollation, self).__init__(fields) + self._load_filepath = filepath + + @property + def data_filepath(self): + return self._load_filepath + + @property + def data_field_indices(self): + """ + Field indices of the contained PPFields in the input file. + + This records the original file location of the individual data fields + contained, within the input datafile. + + Returns: + An integer array of shape `self.vector_dims_shape`. + + """ + # Get shape : N.B. this calculates (and caches) the structure. + vector_dims_shape = self.vector_dims_shape + # Get index-in-file of each contained field. + indices = np.array([field._index_in_structured_load_file + for field in self._fields], + dtype=np.int64) + return indices.reshape(vector_dims_shape) + + def _basic_load_function(filename, pp_filter=None, **kwargs): # The low-level 'fields from filename' loader. # @@ -71,8 +119,6 @@ def _basic_load_function(filename, pp_filter=None, **kwargs): # Therefore, the actual loader will pass this as the 'pp_filter' keyword, # when it is present. # Additional load keywords are 'passed on' to the lower-level function. - from iris.fileformats.um._fast_load_structured_fields import \ - group_structured_fields # Helper function to select the correct fields loader call. def _select_raw_fields_loader(fname): @@ -98,10 +144,20 @@ def _select_raw_fields_loader(fname): return loader loader = _select_raw_fields_loader(filename) - fields = iter(field - for field in loader(filename, **kwargs) - if pp_filter is None or pp_filter(field)) - return group_structured_fields(fields) + + def iter_fields_decorated_with_load_indices(fields_iter): + for i_field, field in enumerate(fields_iter): + field._index_in_structured_load_file = i_field + yield field + + fields = iter_fields_decorated_with_load_indices( + field + for field in loader(filename, **kwargs) + if pp_filter is None or pp_filter(field)) + + return group_structured_fields(fields, + collation_class=FieldCollation, + filepath=filename) # Define the preferred order of candidate dimension coordinates, as used by @@ -342,7 +398,9 @@ def structured_um_loading(): which is normally the whole of one phenomenon from a single input file. In particular, the callback's "field" argument is a :class:`~iris.fileformats.um.FieldCollation`, from which "field.fields" - gives a *list* of PPFields from which that cube was built. + gives a *list* of PPFields from which that cube was built, and the + properties "field.load_filepath" and "field.load_file_indices" + reference the original file locations of the cube data. The code required is therefore different from a 'normal' callback. For an example of this, see `this example in the Iris test code . +""" +Unit tests for the class +:class:`iris.fileformats.um._fast_load.FieldCollation`. + +This only tests the additional functionality for recording file locations of +PPFields that make loaded cubes. +The original class is the baseclass of this, now renamed 'BasicFieldCollation'. + +""" + +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# import iris tests first so that some things can be initialised +# before importing anything else. +import iris.tests as tests + +import numpy as np + +import iris + +from iris.tests.integration.fast_load.test_fast_load import Mixin_FieldTest + + +class TestFastCallbackLocationInfo(Mixin_FieldTest, tests.IrisTest): + do_fast_loads = True + + def setUp(self): + # Call parent setup. + super(TestFastCallbackLocationInfo, self).setUp() + + # Create a basic load test case. + self.callback_collations = [] + self.callback_filepaths = [] + + def fast_load_callback(cube, collation, filename): + self.callback_collations.append(collation) + self.callback_filepaths.append(filename) + + flds = self.fields(c_t='11112222', c_h='11221122', phn='01010101') + self.test_filepath = self.save_fieldcubes(flds) + iris.load(self.test_filepath, callback=fast_load_callback) + + def test_callback_collations_filepaths(self): + self.assertEqual(len(self.callback_collations), 2) + self.assertEqual(self.callback_collations[0].data_filepath, + self.test_filepath) + self.assertEqual(self.callback_collations[1].data_filepath, + self.test_filepath) + + def test_callback_collations_field_indices(self): + self.assertEqual( + self.callback_collations[0].data_field_indices.dtype, np.int64) + self.assertArrayEqual( + self.callback_collations[0].data_field_indices, + [[1, 3], [5, 7]]) + + self.assertEqual( + self.callback_collations[1].data_field_indices.dtype, np.int64) + self.assertArrayEqual( + self.callback_collations[1].data_field_indices, + [[0, 2], [4, 6]]) + + +if __name__ == '__main__': + tests.main() diff --git a/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_FieldCollation.py b/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_BasicFieldCollation.py similarity index 84% rename from lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_FieldCollation.py rename to lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_BasicFieldCollation.py index 12168a85d1..087ec53446 100644 --- a/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_FieldCollation.py +++ b/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_BasicFieldCollation.py @@ -16,7 +16,7 @@ # along with Iris. If not, see . """ Unit tests for the class -:class:`iris.fileformats.um._fast_load_structured_fields.FieldCollation`. +:class:`iris.fileformats.um._fast_load_structured_fields.BasicFieldCollation`. """ @@ -31,20 +31,21 @@ from cftime import datetime import numpy as np -from iris.fileformats.um._fast_load_structured_fields import FieldCollation +from iris.fileformats.um._fast_load_structured_fields \ + import BasicFieldCollation import iris.fileformats.pp class Test___init__(tests.IrisTest): def test_no_fields(self): with self.assertRaises(AssertionError): - FieldCollation([]) + BasicFieldCollation([]) class Test_fields(tests.IrisTest): def test_preserve_members(self): fields = ('foo', 'bar', 'wibble') - collation = FieldCollation(fields) + collation = BasicFieldCollation(fields) self.assertEqual(collation.fields, fields) @@ -80,7 +81,7 @@ def _make_data(fill_value): class Test_data(tests.IrisTest): # Test order of the data attribute when fastest-varying element is changed. def test_t1_varies_faster(self): - collation = FieldCollation( + collation = BasicFieldCollation( [_make_field(lbyr=2013, lbyrd=2000, data=0), _make_field(lbyr=2014, lbyrd=2000, data=1), _make_field(lbyr=2015, lbyrd=2000, data=2), @@ -92,7 +93,7 @@ def test_t1_varies_faster(self): self.assertArrayEqual(result, expected) def test_t2_varies_faster(self): - collation = FieldCollation( + collation = BasicFieldCollation( [_make_field(lbyr=2013, lbyrd=2000, data=0), _make_field(lbyr=2013, lbyrd=2001, data=1), _make_field(lbyr=2013, lbyrd=2002, data=2), @@ -107,12 +108,12 @@ def test_t2_varies_faster(self): class Test_element_arrays_and_dims(tests.IrisTest): def test_single_field(self): field = _make_field(2013) - collation = FieldCollation([field]) + collation = BasicFieldCollation([field]) self.assertEqual(collation.element_arrays_and_dims, {}) def test_t1(self): - collation = FieldCollation([_make_field(lbyr=2013), - _make_field(lbyr=2014)]) + collation = BasicFieldCollation([_make_field(lbyr=2013), + _make_field(lbyr=2014)]) result = collation.element_arrays_and_dims self.assertEqual(list(result.keys()), ['t1']) values, dims = result['t1'] @@ -121,9 +122,9 @@ def test_t1(self): self.assertEqual(dims, (0,)) def test_t1_and_t2(self): - collation = FieldCollation([_make_field(lbyr=2013, lbyrd=2000), - _make_field(lbyr=2014, lbyrd=2001), - _make_field(lbyr=2015, lbyrd=2002)]) + collation = BasicFieldCollation([_make_field(lbyr=2013, lbyrd=2000), + _make_field(lbyr=2014, lbyrd=2001), + _make_field(lbyr=2015, lbyrd=2002)]) result = collation.element_arrays_and_dims self.assertEqual(set(result.keys()), set(['t1', 't2'])) values, dims = result['t1'] @@ -138,10 +139,11 @@ def test_t1_and_t2(self): self.assertEqual(dims, (0,)) def test_t1_and_t2_and_lbft(self): - collation = FieldCollation([_make_field(lbyr=1, lbyrd=15, lbft=6), - _make_field(lbyr=1, lbyrd=16, lbft=9), - _make_field(lbyr=11, lbyrd=25, lbft=6), - _make_field(lbyr=11, lbyrd=26, lbft=9)]) + collation = BasicFieldCollation( + [_make_field(lbyr=1, lbyrd=15, lbft=6), + _make_field(lbyr=1, lbyrd=16, lbft=9), + _make_field(lbyr=11, lbyrd=25, lbft=6), + _make_field(lbyr=11, lbyrd=26, lbft=9)]) result = collation.element_arrays_and_dims self.assertEqual(set(result.keys()), set(['t1', 't2', 'lbft'])) values, dims = result['t1'] @@ -158,7 +160,8 @@ def test_t1_and_t2_and_lbft(self): self.assertEqual(dims, (1,)) def test_blev(self): - collation = FieldCollation([_make_field(blev=1), _make_field(blev=2)]) + collation = BasicFieldCollation([_make_field(blev=1), + _make_field(blev=2)]) result = collation.element_arrays_and_dims keys = set(['blev', 'brsvd1', 'brsvd2', 'brlev', 'bhrlev', 'lblev', 'bhlev']) @@ -168,8 +171,8 @@ def test_blev(self): self.assertEqual(dims, (0,)) def test_bhlev(self): - collation = FieldCollation([_make_field(blev=0, bhlev=1), - _make_field(blev=1, bhlev=2)]) + collation = BasicFieldCollation([_make_field(blev=0, bhlev=1), + _make_field(blev=1, bhlev=2)]) result = collation.element_arrays_and_dims keys = set(['blev', 'brsvd1', 'brsvd2', 'brlev', 'bhrlev', 'lblev', 'bhlev']) @@ -200,7 +203,7 @@ def test(self): (2004, 3, 1, 0, 0, 0), (2005, 1, 1, 0, 0, 0)] - collation = FieldCollation(['foo', 'bar']) + collation = BasicFieldCollation(['foo', 'bar']) test_date_ints = [collation._time_comparable_int(*test_tuple) for test_tuple in test_date_tuples] # Check all values are distinct. diff --git a/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_group_structured_fields.py b/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_group_structured_fields.py index b860cd8232..0d3413189c 100644 --- a/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_group_structured_fields.py +++ b/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_group_structured_fields.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2014 - 2015, Met Office +# (C) British Crown Copyright 2014 - 2018, Met Office # # This file is part of Iris. # @@ -71,9 +71,7 @@ def _dummy_fields_iter(self, stashes=None, models=None, lbprocs=None): def _group_result(self, fields): # Run the testee, but returning just the groups (not FieldCollations). - with mock.patch('iris.fileformats.um._fast_load_structured_fields.' - 'FieldCollation', new=lambda args: args): - result = list(group_structured_fields(fields)) + result = list(group_structured_fields(fields, collation_class=tuple)) return result def _test_fields(self, item):