diff --git a/docs/iris/src/whatsnew/contributions_2.1/newfeature_2018-Apr-12_fast_load_file_indices.txt b/docs/iris/src/whatsnew/contributions_2.1/newfeature_2018-Apr-12_fast_load_file_indices.txt
new file mode 100644
index 0000000000..c7d74fe88f
--- /dev/null
+++ b/docs/iris/src/whatsnew/contributions_2.1/newfeature_2018-Apr-12_fast_load_file_indices.txt
@@ -0,0 +1,8 @@
+* The :class:`iris.fileformats.um.FieldCollation` objects, which are passed
+ into load callbacks when using
+ :func:`iris.fileformats.um.structured_um_loading`, now
+ have the additional properties :
+ :data:`iris.fileformats.um.FieldCollation.data_filepath` and
+ :data:`iris.fileformats.um.FieldCollation.data_field_indices`.
+ These provide the file locations of the original data fields, which are
+ otherwise lost in the structured loading process.
diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py
index f9b3a3a23a..04bd9e952d 100644
--- a/lib/iris/fileformats/pp.py
+++ b/lib/iris/fileformats/pp.py
@@ -831,7 +831,7 @@ def _pp_attribute_names(header_defn):
special_headers = list('_' + name for name in _SPECIAL_HEADERS)
extra_data = list(EXTRA_DATA.values())
special_attributes = ['_raw_header', 'raw_lbtim', 'raw_lbpack',
- 'boundary_packing']
+ 'boundary_packing', '_index_in_structured_load_file']
return normal_headers + special_headers + extra_data + special_attributes
@@ -864,6 +864,7 @@ def __init__(self, header=None):
self.raw_lbtim = None
self.raw_lbpack = None
self.boundary_packing = None
+ self._index_in_structured_load_file = None
if header is not None:
self.raw_lbtim = header[self.HEADER_DICT['lbtim'][0]]
self.raw_lbpack = header[self.HEADER_DICT['lbpack'][0]]
diff --git a/lib/iris/fileformats/um/__init__.py b/lib/iris/fileformats/um/__init__.py
index 493f022977..746087b2f6 100644
--- a/lib/iris/fileformats/um/__init__.py
+++ b/lib/iris/fileformats/um/__init__.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2014 - 2017, Met Office
+# (C) British Crown Copyright 2014 - 2018, Met Office
#
# This file is part of Iris.
#
@@ -27,7 +27,6 @@
# Publish the FF-replacement features here, and include documentation.
from ._ff_replacement import um_to_pp, load_cubes, load_cubes_32bit_ieee
-from ._fast_load import structured_um_loading
-from ._fast_load_structured_fields import FieldCollation
+from ._fast_load import structured_um_loading, FieldCollation
__all__ = ['um_to_pp', 'load_cubes', 'load_cubes_32bit_ieee',
'structured_um_loading', 'FieldCollation']
diff --git a/lib/iris/fileformats/um/_fast_load.py b/lib/iris/fileformats/um/_fast_load.py
index 94255e8f18..bb0bb933d6 100644
--- a/lib/iris/fileformats/um/_fast_load.py
+++ b/lib/iris/fileformats/um/_fast_load.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2016 - 2017, Met Office
+# (C) British Crown Copyright 2016 - 2018, Met Office
#
# This file is part of Iris.
#
@@ -40,19 +40,67 @@
import threading
import os.path
+import numpy as np
+
# Be minimal about what we import from iris, to avoid circular imports.
# Below, other parts of iris.fileformats are accessed via deferred imports.
import iris
from iris.coords import DimCoord
from iris.cube import CubeList
from iris.exceptions import TranslationError
-
+from iris.fileformats.um._fast_load_structured_fields import \
+ BasicFieldCollation, group_structured_fields
# Strings to identify the PP and FF file format handler specs.
_FF_SPEC_NAME = 'UM Fieldsfile'
_PP_SPEC_NAME = 'UM Post Processing file'
+class FieldCollation(BasicFieldCollation):
+ # This class specialises the BasicFieldCollation by adding the file-index
+ # and file-path concepts.
+ # This preserves the more abstract scope of the original 'FieldCollation'
+ # class, now renamed 'BasicFieldCollation'.
+
+ def __init__(self, fields, filepath):
+ """
+ Args:
+
+ * fields (iterable of :class:`iris.fileformats.pp.PPField`):
+ The fields in the collation.
+
+ * filepath (string):
+ The path of the file the collation is loaded from.
+
+ """
+ super(FieldCollation, self).__init__(fields)
+ self._load_filepath = filepath
+
+ @property
+ def data_filepath(self):
+ return self._load_filepath
+
+ @property
+ def data_field_indices(self):
+ """
+ Field indices of the contained PPFields in the input file.
+
+ This records the original file location of the individual data fields
+ contained, within the input datafile.
+
+ Returns:
+ An integer array of shape `self.vector_dims_shape`.
+
+ """
+ # Get shape : N.B. this calculates (and caches) the structure.
+ vector_dims_shape = self.vector_dims_shape
+ # Get index-in-file of each contained field.
+ indices = np.array([field._index_in_structured_load_file
+ for field in self._fields],
+ dtype=np.int64)
+ return indices.reshape(vector_dims_shape)
+
+
def _basic_load_function(filename, pp_filter=None, **kwargs):
# The low-level 'fields from filename' loader.
#
@@ -71,8 +119,6 @@ def _basic_load_function(filename, pp_filter=None, **kwargs):
# Therefore, the actual loader will pass this as the 'pp_filter' keyword,
# when it is present.
# Additional load keywords are 'passed on' to the lower-level function.
- from iris.fileformats.um._fast_load_structured_fields import \
- group_structured_fields
# Helper function to select the correct fields loader call.
def _select_raw_fields_loader(fname):
@@ -98,10 +144,20 @@ def _select_raw_fields_loader(fname):
return loader
loader = _select_raw_fields_loader(filename)
- fields = iter(field
- for field in loader(filename, **kwargs)
- if pp_filter is None or pp_filter(field))
- return group_structured_fields(fields)
+
+ def iter_fields_decorated_with_load_indices(fields_iter):
+ for i_field, field in enumerate(fields_iter):
+ field._index_in_structured_load_file = i_field
+ yield field
+
+ fields = iter_fields_decorated_with_load_indices(
+ field
+ for field in loader(filename, **kwargs)
+ if pp_filter is None or pp_filter(field))
+
+ return group_structured_fields(fields,
+ collation_class=FieldCollation,
+ filepath=filename)
# Define the preferred order of candidate dimension coordinates, as used by
@@ -342,7 +398,9 @@ def structured_um_loading():
which is normally the whole of one phenomenon from a single input file.
In particular, the callback's "field" argument is a
:class:`~iris.fileformats.um.FieldCollation`, from which "field.fields"
- gives a *list* of PPFields from which that cube was built.
+ gives a *list* of PPFields from which that cube was built, and the
+ properties "field.load_filepath" and "field.load_file_indices"
+ reference the original file locations of the cube data.
The code required is therefore different from a 'normal' callback.
For an example of this, see `this example in the Iris test code
.
+"""
+Unit tests for the class
+:class:`iris.fileformats.um._fast_load.FieldCollation`.
+
+This only tests the additional functionality for recording file locations of
+PPFields that make loaded cubes.
+The original class is the baseclass of this, now renamed 'BasicFieldCollation'.
+
+"""
+
+from __future__ import (absolute_import, division, print_function)
+from six.moves import (filter, input, map, range, zip) # noqa
+
+# import iris tests first so that some things can be initialised
+# before importing anything else.
+import iris.tests as tests
+
+import numpy as np
+
+import iris
+
+from iris.tests.integration.fast_load.test_fast_load import Mixin_FieldTest
+
+
+class TestFastCallbackLocationInfo(Mixin_FieldTest, tests.IrisTest):
+ do_fast_loads = True
+
+ def setUp(self):
+ # Call parent setup.
+ super(TestFastCallbackLocationInfo, self).setUp()
+
+ # Create a basic load test case.
+ self.callback_collations = []
+ self.callback_filepaths = []
+
+ def fast_load_callback(cube, collation, filename):
+ self.callback_collations.append(collation)
+ self.callback_filepaths.append(filename)
+
+ flds = self.fields(c_t='11112222', c_h='11221122', phn='01010101')
+ self.test_filepath = self.save_fieldcubes(flds)
+ iris.load(self.test_filepath, callback=fast_load_callback)
+
+ def test_callback_collations_filepaths(self):
+ self.assertEqual(len(self.callback_collations), 2)
+ self.assertEqual(self.callback_collations[0].data_filepath,
+ self.test_filepath)
+ self.assertEqual(self.callback_collations[1].data_filepath,
+ self.test_filepath)
+
+ def test_callback_collations_field_indices(self):
+ self.assertEqual(
+ self.callback_collations[0].data_field_indices.dtype, np.int64)
+ self.assertArrayEqual(
+ self.callback_collations[0].data_field_indices,
+ [[1, 3], [5, 7]])
+
+ self.assertEqual(
+ self.callback_collations[1].data_field_indices.dtype, np.int64)
+ self.assertArrayEqual(
+ self.callback_collations[1].data_field_indices,
+ [[0, 2], [4, 6]])
+
+
+if __name__ == '__main__':
+ tests.main()
diff --git a/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_FieldCollation.py b/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_BasicFieldCollation.py
similarity index 84%
rename from lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_FieldCollation.py
rename to lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_BasicFieldCollation.py
index 12168a85d1..087ec53446 100644
--- a/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_FieldCollation.py
+++ b/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_BasicFieldCollation.py
@@ -16,7 +16,7 @@
# along with Iris. If not, see .
"""
Unit tests for the class
-:class:`iris.fileformats.um._fast_load_structured_fields.FieldCollation`.
+:class:`iris.fileformats.um._fast_load_structured_fields.BasicFieldCollation`.
"""
@@ -31,20 +31,21 @@
from cftime import datetime
import numpy as np
-from iris.fileformats.um._fast_load_structured_fields import FieldCollation
+from iris.fileformats.um._fast_load_structured_fields \
+ import BasicFieldCollation
import iris.fileformats.pp
class Test___init__(tests.IrisTest):
def test_no_fields(self):
with self.assertRaises(AssertionError):
- FieldCollation([])
+ BasicFieldCollation([])
class Test_fields(tests.IrisTest):
def test_preserve_members(self):
fields = ('foo', 'bar', 'wibble')
- collation = FieldCollation(fields)
+ collation = BasicFieldCollation(fields)
self.assertEqual(collation.fields, fields)
@@ -80,7 +81,7 @@ def _make_data(fill_value):
class Test_data(tests.IrisTest):
# Test order of the data attribute when fastest-varying element is changed.
def test_t1_varies_faster(self):
- collation = FieldCollation(
+ collation = BasicFieldCollation(
[_make_field(lbyr=2013, lbyrd=2000, data=0),
_make_field(lbyr=2014, lbyrd=2000, data=1),
_make_field(lbyr=2015, lbyrd=2000, data=2),
@@ -92,7 +93,7 @@ def test_t1_varies_faster(self):
self.assertArrayEqual(result, expected)
def test_t2_varies_faster(self):
- collation = FieldCollation(
+ collation = BasicFieldCollation(
[_make_field(lbyr=2013, lbyrd=2000, data=0),
_make_field(lbyr=2013, lbyrd=2001, data=1),
_make_field(lbyr=2013, lbyrd=2002, data=2),
@@ -107,12 +108,12 @@ def test_t2_varies_faster(self):
class Test_element_arrays_and_dims(tests.IrisTest):
def test_single_field(self):
field = _make_field(2013)
- collation = FieldCollation([field])
+ collation = BasicFieldCollation([field])
self.assertEqual(collation.element_arrays_and_dims, {})
def test_t1(self):
- collation = FieldCollation([_make_field(lbyr=2013),
- _make_field(lbyr=2014)])
+ collation = BasicFieldCollation([_make_field(lbyr=2013),
+ _make_field(lbyr=2014)])
result = collation.element_arrays_and_dims
self.assertEqual(list(result.keys()), ['t1'])
values, dims = result['t1']
@@ -121,9 +122,9 @@ def test_t1(self):
self.assertEqual(dims, (0,))
def test_t1_and_t2(self):
- collation = FieldCollation([_make_field(lbyr=2013, lbyrd=2000),
- _make_field(lbyr=2014, lbyrd=2001),
- _make_field(lbyr=2015, lbyrd=2002)])
+ collation = BasicFieldCollation([_make_field(lbyr=2013, lbyrd=2000),
+ _make_field(lbyr=2014, lbyrd=2001),
+ _make_field(lbyr=2015, lbyrd=2002)])
result = collation.element_arrays_and_dims
self.assertEqual(set(result.keys()), set(['t1', 't2']))
values, dims = result['t1']
@@ -138,10 +139,11 @@ def test_t1_and_t2(self):
self.assertEqual(dims, (0,))
def test_t1_and_t2_and_lbft(self):
- collation = FieldCollation([_make_field(lbyr=1, lbyrd=15, lbft=6),
- _make_field(lbyr=1, lbyrd=16, lbft=9),
- _make_field(lbyr=11, lbyrd=25, lbft=6),
- _make_field(lbyr=11, lbyrd=26, lbft=9)])
+ collation = BasicFieldCollation(
+ [_make_field(lbyr=1, lbyrd=15, lbft=6),
+ _make_field(lbyr=1, lbyrd=16, lbft=9),
+ _make_field(lbyr=11, lbyrd=25, lbft=6),
+ _make_field(lbyr=11, lbyrd=26, lbft=9)])
result = collation.element_arrays_and_dims
self.assertEqual(set(result.keys()), set(['t1', 't2', 'lbft']))
values, dims = result['t1']
@@ -158,7 +160,8 @@ def test_t1_and_t2_and_lbft(self):
self.assertEqual(dims, (1,))
def test_blev(self):
- collation = FieldCollation([_make_field(blev=1), _make_field(blev=2)])
+ collation = BasicFieldCollation([_make_field(blev=1),
+ _make_field(blev=2)])
result = collation.element_arrays_and_dims
keys = set(['blev', 'brsvd1', 'brsvd2', 'brlev',
'bhrlev', 'lblev', 'bhlev'])
@@ -168,8 +171,8 @@ def test_blev(self):
self.assertEqual(dims, (0,))
def test_bhlev(self):
- collation = FieldCollation([_make_field(blev=0, bhlev=1),
- _make_field(blev=1, bhlev=2)])
+ collation = BasicFieldCollation([_make_field(blev=0, bhlev=1),
+ _make_field(blev=1, bhlev=2)])
result = collation.element_arrays_and_dims
keys = set(['blev', 'brsvd1', 'brsvd2', 'brlev',
'bhrlev', 'lblev', 'bhlev'])
@@ -200,7 +203,7 @@ def test(self):
(2004, 3, 1, 0, 0, 0),
(2005, 1, 1, 0, 0, 0)]
- collation = FieldCollation(['foo', 'bar'])
+ collation = BasicFieldCollation(['foo', 'bar'])
test_date_ints = [collation._time_comparable_int(*test_tuple)
for test_tuple in test_date_tuples]
# Check all values are distinct.
diff --git a/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_group_structured_fields.py b/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_group_structured_fields.py
index b860cd8232..0d3413189c 100644
--- a/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_group_structured_fields.py
+++ b/lib/iris/tests/unit/fileformats/um/fast_load_structured_fields/test_group_structured_fields.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2014 - 2015, Met Office
+# (C) British Crown Copyright 2014 - 2018, Met Office
#
# This file is part of Iris.
#
@@ -71,9 +71,7 @@ def _dummy_fields_iter(self, stashes=None, models=None, lbprocs=None):
def _group_result(self, fields):
# Run the testee, but returning just the groups (not FieldCollations).
- with mock.patch('iris.fileformats.um._fast_load_structured_fields.'
- 'FieldCollation', new=lambda args: args):
- result = list(group_structured_fields(fields))
+ result = list(group_structured_fields(fields, collation_class=tuple))
return result
def _test_fields(self, item):