From b8530659c4aa60b1ebe20d0dbcea0d94c660312b Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Tue, 15 Jan 2019 21:08:24 +0000
Subject: [PATCH 1/3] Fix landsea-mask data access for UM files.

---
 .travis.yml                                   |   2 +-
 lib/iris/fileformats/pp.py                    | 136 ++++++++++++++----
 lib/iris/tests/test_pp_module.py              |   4 +-
 .../unit/fileformats/pp/test_PPDataProxy.py   |   6 +-
 .../fileformats/pp/test__create_field_data.py |   8 +-
 .../pp/test__data_bytes_to_shaped_array.py    |  17 ++-
 .../fileformats/pp/test__interpret_field.py   |  54 +++++--
 7 files changed, 161 insertions(+), 66 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index ac5d341307..27c9cfe2c6 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -28,7 +28,7 @@ git:
 
 install:
   - >
-    export IRIS_TEST_DATA_REF="2f3a6bcf25f81bd152b3d66223394074c9069a96";
+    export IRIS_TEST_DATA_REF="dba47566a9147645fea586f94a138e0a8d45a48e";
     export IRIS_TEST_DATA_SUFFIX=$(echo "${IRIS_TEST_DATA_REF}" | sed "s/^v//");
 
   # Cut short doctest phase under Python 2 : now only supports Python 3
diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py
index 1c1d281871..57b311b7a5 100644
--- a/lib/iris/fileformats/pp.py
+++ b/lib/iris/fileformats/pp.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2010 - 2018, Met Office
+# (C) British Crown Copyright 2010 - 2019, Met Office
 #
 # This file is part of Iris.
 #
@@ -37,6 +37,9 @@
 import numpy.ma as ma
 import cftime
 
+import dask
+import dask.array as da
+
 from iris._deprecation import warn_deprecated
 from iris._lazy_data import as_concrete_data, as_lazy_data, is_lazy_data
 import iris.config
@@ -602,10 +605,10 @@ class PPDataProxy(object):
     """A reference to the data payload of a single PP field."""
 
     __slots__ = ('shape', 'src_dtype', 'path', 'offset', 'data_len',
-                 '_lbpack', 'boundary_packing', 'mdi', 'mask')
+                 '_lbpack', 'boundary_packing', 'mdi')
 
     def __init__(self, shape, src_dtype, path, offset, data_len,
-                 lbpack, boundary_packing, mdi, mask):
+                 lbpack, boundary_packing, mdi):
         self.shape = shape
         self.src_dtype = src_dtype
         self.path = path
@@ -614,7 +617,6 @@ def __init__(self, shape, src_dtype, path, offset, data_len,
         self.lbpack = lbpack
         self.boundary_packing = boundary_packing
         self.mdi = mdi
-        self.mask = mask
 
     # lbpack
     def _lbpack_setter(self, value):
@@ -649,14 +651,14 @@ def __getitem__(self, keys):
                                                self.lbpack,
                                                self.boundary_packing,
                                                self.shape, self.src_dtype,
-                                               self.mdi, self.mask)
+                                               self.mdi)
         data = data.__getitem__(keys)
         return np.asanyarray(data, dtype=self.dtype)
 
     def __repr__(self):
         fmt = '<{self.__class__.__name__} shape={self.shape}' \
               ' src_dtype={self.dtype!r} path={self.path!r}' \
-              ' offset={self.offset} mask={self.mask!r}>'
+              ' offset={self.offset}>'
         return fmt.format(self=self)
 
     def __getstate__(self):
@@ -772,24 +774,29 @@ def _data_bytes_to_shaped_array(data_bytes, lbpack, boundary_packing,
 
     elif lbpack.n2 == 2:
         if mask is None:
-            raise ValueError('No mask was found to unpack the data. '
-                             'Could not load.')
-        land_mask = mask.data.astype(np.bool)
-        sea_mask = ~land_mask
-        new_data = np.ma.masked_all(land_mask.shape)
-        new_data.fill_value = mdi
-        if lbpack.n3 == 1:
-            # Land mask packed data.
-            # Sometimes the data comes in longer than it should be (i.e. it
-            # looks like the compressed data is compressed, but the trailing
-            # data hasn't been clipped off!).
-            new_data[land_mask] = data[:land_mask.sum()]
-        elif lbpack.n3 == 2:
-            # Sea mask packed data.
-            new_data[sea_mask] = data[:sea_mask.sum()]
+            # If we are given no mask to apply, then just return raw data, even
+            # though it does not have the correct shape.
+            # For dask-delayed loading, this means that mask, data and the
+            # combination can be properly handled within a dask graph.
+            # However, we still mask any MDI values in the array (below).
+            pass
         else:
-            raise ValueError('Unsupported mask compression.')
-        data = new_data
+            land_mask = mask.data.astype(np.bool)
+            sea_mask = ~land_mask
+            new_data = np.ma.masked_all(land_mask.shape)
+            new_data.fill_value = mdi
+            if lbpack.n3 == 1:
+                # Land mask packed data.
+                # Sometimes the data comes in longer than it should be (i.e. it
+                # looks like the compressed data is compressed, but the
+                # trailing data hasn't been clipped off!).
+                new_data[land_mask] = data[:land_mask.sum()]
+            elif lbpack.n3 == 2:
+                # Sea mask packed data.
+                new_data[sea_mask] = data[:sea_mask.sum()]
+            else:
+                raise ValueError('Unsupported mask compression.')
+            data = new_data
 
     else:
         # Reform in row-column order
@@ -1591,20 +1598,31 @@ def _interpret_fields(fields):
                 (field.lbuser[3] % 1000) == 30:
             land_mask = field
 
+        apply_landmask = None
+
         # Handle land compressed data payloads,
         # when lbpack.n2 is 2.
         if (field.raw_lbpack // 10 % 10) == 2:
             if land_mask is None:
                 landmask_compressed_fields.append(field)
+                # Land-masked fields have their size+shape defined by the
+                # reference landmask field, so we can't yield them if they
+                # are encountered *before* the landmask.
+                # In that case, defer them, and process them all afterwards at
+                # the end of the file.
                 continue
 
-            # Land compressed fields don't have a lbrow and lbnpt.
+            # Landmask-compressed fields don't have an lbrow and lbnpt.
             field.lbrow, field.lbnpt = land_mask.lbrow, land_mask.lbnpt
+            # Construct a data array using the landmask field as a template.
+            apply_landmask = land_mask
 
-        data_shape = (field.lbrow, field.lbnpt)
-        _create_field_data(field, data_shape, land_mask)
+        _create_field_data(field, (field.lbrow, field.lbnpt),
+                           with_landmask_field=apply_landmask)
         yield field
 
+    # At file end, return any land-masked fields that were deferred because
+    # they were encountered before the landmask reference field.
     if landmask_compressed_fields:
         if land_mask is None:
             warnings.warn('Landmask compressed fields existed without a '
@@ -1616,16 +1634,21 @@ def _interpret_fields(fields):
 
         for field in landmask_compressed_fields:
             field.lbrow, field.lbnpt = mask_shape
-            _create_field_data(field, (field.lbrow, field.lbnpt), land_mask)
+            _create_field_data(field, mask_shape,
+                               with_landmask_field=land_mask)
             yield field
 
 
-def _create_field_data(field, data_shape, land_mask):
+def _create_field_data(field, data_shape, with_landmask_field=None):
     """
     Modifies a field's ``_data`` attribute either by:
      * converting DeferredArrayBytes into a lazy array,
      * converting LoadedArrayBytes into an actual numpy array.
 
+    If 'with_landmask_field' is passed, it is another field :  The landmask
+    field's data is used as a template for this field's data, determining its
+    size, shape and the locations of valid (non-missing) datapoints.
+
     """
     if isinstance(field.core_data(), LoadedArrayBytes):
         loaded_bytes = field.core_data()
@@ -1634,7 +1657,8 @@ def _create_field_data(field, data_shape, land_mask):
                                                  field.boundary_packing,
                                                  data_shape,
                                                  loaded_bytes.dtype,
-                                                 field.bmdi, land_mask)
+                                                 field.bmdi,
+                                                 with_landmask_field)
     else:
         # Wrap the reference to the data payload within a data proxy
         # in order to support deferred data loading.
@@ -1643,9 +1667,59 @@ def _create_field_data(field, data_shape, land_mask):
                             fname, position, n_bytes,
                             field.raw_lbpack,
                             field.boundary_packing,
-                            field.bmdi, land_mask)
+                            field.bmdi)
         block_shape = data_shape if 0 not in data_shape else (1, 1)
-        field.data = as_lazy_data(proxy, chunks=block_shape)
+        if with_landmask_field is None:
+            # For a "normal" (non-landsea-masked) field, the proxy can be
+            # wrapped directly as a deferred array.
+            field.data = as_lazy_data(proxy, chunks=block_shape)
+        else:
+            # This is a landsea-masked field, and its data must be handled in
+            # a different way :  Because data shape/size is not known in
+            # advance, the data+mask calculation can't be represented
+            # as a dask-array operation.  Instead, we make that calculation
+            # 'delayed', and then use 'from_delayed' to make the result back
+            # into a dask array -- because the final result shape *is* known.
+            @dask.delayed
+            def fetch_valid_values_array():
+                # Return the data values array (shape+size unknown).
+                return proxy[:]
+
+            delayed_valid_values = fetch_valid_values_array()
+
+            # Get the mask data-array from the landsea-mask field.
+            # This is *either* a lazy or a real array, we don't actually care.
+            # If this is a deferred dependency, the delayed calc can see that.
+            mask_field_array = with_landmask_field.core_data()
+
+            # Check whether this field uses a land or a sea mask.
+            if field.lbpack.n3 not in (1, 2):
+                raise ValueError('Unsupported mask compression : '
+                                 'lbpack.n3 = {}.'.format(field.lbpack.n3))
+            if field.lbpack.n3 == 2:
+                # Sea-mask packing : points are inverse of the land-mask.
+                mask_field_array = ~mask_field_array
+
+            # Define the mask+data calculation as a deferred operation.
+            # NOTE: duplicates the operation in _data_bytes_to_shaped_array.
+            @dask.delayed
+            def calc_array(mask, values):
+                # Note: "mask" is True at *valid* points, not missing ones.
+                # First ensure the mask array is boolean (not int).
+                mask = mask.astype(bool)
+                result = ma.masked_all(mask.shape, dtype=dtype)
+                n_values = np.sum(mask)
+                if n_values > 0:
+                    # Note: data field can have excess values, but not fewer.
+                    result[mask] = values[:n_values]
+                return result
+
+            delayed_result = calc_array(mask_field_array,
+                                        delayed_valid_values)
+            lazy_result_array = da.from_delayed(delayed_result,
+                                                shape=block_shape,
+                                                dtype=dtype)
+            field.data = lazy_result_array
 
 
 def _field_gen(filename, read_data_bytes, little_ended=False):
diff --git a/lib/iris/tests/test_pp_module.py b/lib/iris/tests/test_pp_module.py
index 2c7ac0f1cf..1537c13c29 100644
--- a/lib/iris/tests/test_pp_module.py
+++ b/lib/iris/tests/test_pp_module.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2013 - 2018, Met Office
+# (C) British Crown Copyright 2013 - 2019, Met Office
 #
 # This file is part of Iris.
 #
@@ -443,7 +443,7 @@ class TestPPDataProxyEquality(tests.IrisTest):
     def test_not_implemented(self):
         class Terry(object): pass
         pox = pp.PPDataProxy("john", "michael", "eric", "graham", "brian",
-                             "spam", "beans", "eggs", "parrot")
+                             "spam", "beans", "eggs")
         self.assertIs(pox.__eq__(Terry()), NotImplemented)
         self.assertIs(pox.__ne__(Terry()), NotImplemented)
 
diff --git a/lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py b/lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py
index 0732f296bb..8dc21f288f 100644
--- a/lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py
+++ b/lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2014 - 2015, Met Office
+# (C) British Crown Copyright 2014 - 2019, Met Office
 #
 # This file is part of Iris.
 #
@@ -31,14 +31,14 @@ class Test_lbpack(tests.IrisTest):
     def test_lbpack_SplittableInt(self):
         lbpack = mock.Mock(spec_set=SplittableInt)
         proxy = PPDataProxy(None, None, None, None,
-                            None, lbpack, None, None, None)
+                            None, lbpack, None, None)
         self.assertEqual(proxy.lbpack, lbpack)
         self.assertIs(proxy.lbpack, lbpack)
 
     def test_lnpack_raw(self):
         lbpack = 4321
         proxy = PPDataProxy(None, None, None, None,
-                            None, lbpack, None, None, None)
+                            None, lbpack, None, None)
         self.assertEqual(proxy.lbpack, lbpack)
         self.assertIsNot(proxy.lbpack, lbpack)
         self.assertIsInstance(proxy.lbpack, SplittableInt)
diff --git a/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py b/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py
index 743d52ea0e..8821220689 100644
--- a/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py
+++ b/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2013 - 2017, Met Office
+# (C) British Crown Copyright 2013 - 2019, Met Office
 #
 # This file is part of Iris.
 #
@@ -63,7 +63,6 @@ def test_deferred_bytes(self):
         core_data = mock.MagicMock(return_value=deferred_bytes)
         field = mock.Mock(core_data=core_data)
         data_shape = (100, 120)
-        land_mask = mock.Mock()
         proxy = mock.Mock(dtype=np.dtype('f4'), shape=data_shape,
                           spec=pp.PPDataProxy)
         # We can't directly inspect the concrete data source underlying
@@ -71,7 +70,7 @@ def test_deferred_bytes(self):
         # being created and invoked correctly.
         with mock.patch('iris.fileformats.pp.PPDataProxy') as PPDataProxy:
             PPDataProxy.return_value = proxy
-            pp._create_field_data(field, data_shape, land_mask)
+            pp._create_field_data(field, data_shape, with_landmask_field=None)
         # The data should be assigned via field.data. As this is a mock object
         # we can check the attribute directly.
         self.assertEqual(field.data.shape, data_shape)
@@ -84,8 +83,7 @@ def test_deferred_bytes(self):
                                             n_bytes,
                                             field.raw_lbpack,
                                             field.boundary_packing,
-                                            field.bmdi,
-                                            land_mask)
+                                            field.bmdi)
 
 
 if __name__ == "__main__":
diff --git a/lib/iris/tests/unit/fileformats/pp/test__data_bytes_to_shaped_array.py b/lib/iris/tests/unit/fileformats/pp/test__data_bytes_to_shaped_array.py
index 4870624902..09ca8d99af 100644
--- a/lib/iris/tests/unit/fileformats/pp/test__data_bytes_to_shaped_array.py
+++ b/lib/iris/tests/unit/fileformats/pp/test__data_bytes_to_shaped_array.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2013 - 2017, Met Office
+# (C) British Crown Copyright 2013 - 2019, Met Office
 #
 # This file is part of Iris.
 #
@@ -111,16 +111,15 @@ def create_lbpack(self, value):
         return pp.SplittableInt(value, name_mapping)
 
     def test_no_land_mask(self):
+        # Check that without a mask, it returns the raw (compressed) data.
         with mock.patch('numpy.frombuffer',
                         return_value=np.arange(3)):
-            with self.assertRaises(ValueError) as err:
-                pp._data_bytes_to_shaped_array(mock.Mock(),
-                                               self.create_lbpack(120), None,
-                                               (3, 4), np.dtype('>f4'),
-                                               -999, mask=None)
-            self.assertEqual(str(err.exception),
-                             ('No mask was found to unpack the data. '
-                              'Could not load.'))
+            result = pp._data_bytes_to_shaped_array(
+                mock.Mock(),
+                self.create_lbpack(120), None,
+                (3, 4), np.dtype('>f4'),
+                -999, mask=None)
+            self.assertArrayAllClose(result, np.arange(3))
 
     def test_land_mask(self):
         # Check basic land unpacking.
diff --git a/lib/iris/tests/unit/fileformats/pp/test__interpret_field.py b/lib/iris/tests/unit/fileformats/pp/test__interpret_field.py
index 1828f44a87..4273a361da 100644
--- a/lib/iris/tests/unit/fileformats/pp/test__interpret_field.py
+++ b/lib/iris/tests/unit/fileformats/pp/test__interpret_field.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2013 - 2017, Met Office
+# (C) British Crown Copyright 2013 - 2019, Met Office
 #
 # This file is part of Iris.
 #
@@ -26,6 +26,7 @@
 from copy import deepcopy
 import numpy as np
 
+import iris
 import iris.fileformats.pp as pp
 from iris.tests import mock
 
@@ -37,7 +38,8 @@ def setUp(self):
         # A field packed using a land/sea mask.
         self.pp_field = mock.Mock(lblrec=1, lbext=0, lbuser=[0] * 7,
                                   lbrow=0, lbnpt=0,
-                                  raw_lbpack=20,
+                                  raw_lbpack=21,
+                                  lbpack=mock.Mock(n1=0, n2=2, n3=1),
                                   core_data=core_data)
         # The field specifying the land/seamask.
         lbuser = [None, None, None, 30, None, None, 1]  # m01s00i030
@@ -91,19 +93,41 @@ def test_deferred_fix_lbrow_lbnpt(self):
         self.assertEqual(f1.lbrow, 3)
         self.assertEqual(f1.lbnpt, 4)
 
-    def test_shared_land_mask_field(self):
-        # Check that multiple land masked fields share the
-        # land mask field instance.
-        f1 = deepcopy(self.pp_field)
-        f2 = deepcopy(self.pp_field)
-        self.assertIsNot(f1, f2)
-        with mock.patch('iris.fileformats.pp.PPDataProxy') as PPDataProxy:
-            PPDataProxy.return_value = mock.MagicMock(shape=(3, 4),
-                                                      dtype=np.float32)
-            list(pp._interpret_fields([f1, self.land_mask_field, f2]))
-        for call in PPDataProxy.call_args_list:
-            positional_args = call[0]
-            self.assertIs(positional_args[8], self.land_mask_field)
+    @tests.skip_data
+    def test_landsea_unpacking_uses_dask(self):
+        # Ensure that the graph of the (lazy) landsea-masked data contains an
+        # explicit reference to a (lazy) landsea-mask field.
+        # Otherwise its compute() will need to invoke another compute().
+        # See https://github.com/SciTools/iris/issues/3237
+
+        # This is too complex to explore in a mock-ist way, so let's load a
+        # tiny bit of real data ...
+        testfile_path = tests.get_data_path(
+            ['FF', 'landsea_masked', 'testdata_mini_lsm.ff'])
+        landsea_mask, soil_temp = iris.load_cubes(
+            testfile_path, ('land_binary_mask', 'soil_temperature'))
+
+        # Now check that the soil-temp dask graph correctly references the
+        # landsea mask, in its dask graph.
+        lazy_mask_array = landsea_mask.core_data()
+        lazy_soildata_array = soil_temp.core_data()
+
+        # Work out the main dask key for the mask data, as used by 'compute()'.
+        mask_toplev_key = (lazy_mask_array.name,) + (0,) * lazy_mask_array.ndim
+        # Get the 'main' calculation entry.
+        mask_toplev_item = lazy_mask_array.dask[mask_toplev_key]
+        # This should be a task (a simple fetch).
+        self.assertTrue(callable(mask_toplev_item[0]))
+        # Get the key (name) of the array that it fetches.
+        mask_data_name = mask_toplev_item[1]
+
+        # Check that the item this refers to is a PPDataProxy.
+        self.assertIsInstance(lazy_mask_array.dask[mask_data_name],
+                              pp.PPDataProxy)
+
+        # Check that the soil-temp graph references the *same* lazy element,
+        # showing that the mask+data calculation is handled by dask.
+        self.assertIn(mask_data_name, lazy_soildata_array.dask.keys())
 
 
 if __name__ == "__main__":

From 918adeddc5917a29810450c70c1708023562f2a0 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Fri, 17 May 2019 15:54:30 +0100
Subject: [PATCH 2/3] Review changes.

---
 lib/iris/fileformats/pp.py                    | 61 +++++++++++--------
 .../fileformats/pp/test__create_field_data.py |  2 +-
 2 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py
index 57b311b7a5..3d248a21a2 100644
--- a/lib/iris/fileformats/pp.py
+++ b/lib/iris/fileformats/pp.py
@@ -1588,22 +1588,20 @@ def _interpret_fields(fields):
     numpy arrays (via the _create_field_data) function.
 
     """
-    land_mask = None
+    land_mask_field = None
     landmask_compressed_fields = []
     for field in fields:
         # Store the first reference to a land mask, and use this as the
         # definitive mask for future fields in this generator.
-        if land_mask is None and field.lbuser[6] == 1 and \
+        if land_mask_field is None and field.lbuser[6] == 1 and \
                 (field.lbuser[3] // 1000) == 0 and \
                 (field.lbuser[3] % 1000) == 30:
-            land_mask = field
+            land_mask_field = field
 
-        apply_landmask = None
-
-        # Handle land compressed data payloads,
-        # when lbpack.n2 is 2.
+        # Handle land compressed data payloads, when lbpack.n2 is 2.
         if (field.raw_lbpack // 10 % 10) == 2:
-            if land_mask is None:
+            # Field uses land-mask packing, so needs a related land-mask field.
+            if land_mask_field is None:
                 landmask_compressed_fields.append(field)
                 # Land-masked fields have their size+shape defined by the
                 # reference landmask field, so we can't yield them if they
@@ -1612,42 +1610,45 @@ def _interpret_fields(fields):
                 # the end of the file.
                 continue
 
-            # Landmask-compressed fields don't have an lbrow and lbnpt.
-            field.lbrow, field.lbnpt = land_mask.lbrow, land_mask.lbnpt
-            # Construct a data array using the landmask field as a template.
-            apply_landmask = land_mask
+            # Land-mask compressed fields don't have an lbrow and lbnpt.
+            field.lbrow, field.lbnpt = \
+                land_mask_field.lbrow, land_mask_field.lbnpt
+            _create_field_data(field, (field.lbrow, field.lbnpt),
+                               land_mask_field=land_mask_field)
+        else:
+            # Field does not use land-mask packing.
+            _create_field_data(field, (field.lbrow, field.lbnpt))
 
-        _create_field_data(field, (field.lbrow, field.lbnpt),
-                           with_landmask_field=apply_landmask)
         yield field
 
     # At file end, return any land-masked fields that were deferred because
     # they were encountered before the landmask reference field.
     if landmask_compressed_fields:
-        if land_mask is None:
+        if land_mask_field is None:
             warnings.warn('Landmask compressed fields existed without a '
                           'landmask to decompress with. The data will have '
                           'a shape of (0, 0) and will not read.')
             mask_shape = (0, 0)
         else:
-            mask_shape = (land_mask.lbrow, land_mask.lbnpt)
+            mask_shape = (land_mask_field.lbrow, land_mask_field.lbnpt)
 
         for field in landmask_compressed_fields:
             field.lbrow, field.lbnpt = mask_shape
             _create_field_data(field, mask_shape,
-                               with_landmask_field=land_mask)
+                               land_mask_field=land_mask_field)
             yield field
 
 
-def _create_field_data(field, data_shape, with_landmask_field=None):
+def _create_field_data(field, data_shape, land_mask_field=None):
     """
     Modifies a field's ``_data`` attribute either by:
      * converting DeferredArrayBytes into a lazy array,
      * converting LoadedArrayBytes into an actual numpy array.
 
-    If 'with_landmask_field' is passed, it is another field :  The landmask
-    field's data is used as a template for this field's data, determining its
-    size, shape and the locations of valid (non-missing) datapoints.
+    If 'land_mask_field' is passed (not None), then it contains the associated
+    landmask, which is also a field :  Its data array is used as a template for
+    `field`'s data, determining its size, shape and the locations of all the
+    valid (non-missing) datapoints.
 
     """
     if isinstance(field.core_data(), LoadedArrayBytes):
@@ -1658,7 +1659,7 @@ def _create_field_data(field, data_shape, with_landmask_field=None):
                                                  data_shape,
                                                  loaded_bytes.dtype,
                                                  field.bmdi,
-                                                 with_landmask_field)
+                                                 land_mask_field)
     else:
         # Wrap the reference to the data payload within a data proxy
         # in order to support deferred data loading.
@@ -1669,7 +1670,7 @@ def _create_field_data(field, data_shape, with_landmask_field=None):
                             field.boundary_packing,
                             field.bmdi)
         block_shape = data_shape if 0 not in data_shape else (1, 1)
-        if with_landmask_field is None:
+        if land_mask_field is None:
             # For a "normal" (non-landsea-masked) field, the proxy can be
             # wrapped directly as a deferred array.
             field.data = as_lazy_data(proxy, chunks=block_shape)
@@ -1690,7 +1691,7 @@ def fetch_valid_values_array():
             # Get the mask data-array from the landsea-mask field.
             # This is *either* a lazy or a real array, we don't actually care.
             # If this is a deferred dependency, the delayed calc can see that.
-            mask_field_array = with_landmask_field.core_data()
+            mask_field_array = land_mask_field.core_data()
 
             # Check whether this field uses a land or a sea mask.
             if field.lbpack.n3 not in (1, 2):
@@ -1708,6 +1709,11 @@ def calc_array(mask, values):
                 # First ensure the mask array is boolean (not int).
                 mask = mask.astype(bool)
                 result = ma.masked_all(mask.shape, dtype=dtype)
+                # Apply the fill-value from the proxy object.
+                # Note: 'values' is just 'proxy' in a dask wrapper.  This arg
+                # must be a dask type so that 'delayed' can recognise it, but
+                # that provides no access to the underlying fill value.
+                result.fill_value = proxy.mdi
                 n_values = np.sum(mask)
                 if n_values > 0:
                     # Note: data field can have excess values, but not fewer.
@@ -1735,6 +1741,13 @@ def _field_gen(filename, read_data_bytes, little_ended=False):
     sufficient information within the field to determine the final
     two-dimensional shape of the data.
 
+    The callers of this routine are the 'load' routines (both PP and FF).
+    They both filter the resulting stream of fields with `_interpret_fields`,
+    which replaces each field.data with an actual array (real or lazy).
+    This is done separately so that `_interpret_fields` can detect land-mask
+    fields and use them to construct data arrays for any fields which use
+    land/sea-mask packing.
+
     """
     dtype_endian_char = '<' if little_ended else '>'
     with open(filename, 'rb') as pp_file:
diff --git a/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py b/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py
index 8821220689..d9816c64b9 100644
--- a/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py
+++ b/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py
@@ -70,7 +70,7 @@ def test_deferred_bytes(self):
         # being created and invoked correctly.
         with mock.patch('iris.fileformats.pp.PPDataProxy') as PPDataProxy:
             PPDataProxy.return_value = proxy
-            pp._create_field_data(field, data_shape, with_landmask_field=None)
+            pp._create_field_data(field, data_shape, land_mask_field=None)
         # The data should be assigned via field.data. As this is a mock object
         # we can check the attribute directly.
         self.assertEqual(field.data.shape, data_shape)

From 5864690c84c712fd2019d8dcf9fce1afb627ae4c Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Fri, 17 May 2019 16:17:00 +0100
Subject: [PATCH 3/3] Fixed some stale comments  + unused variables.

---
 lib/iris/fileformats/pp.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py
index 3d248a21a2..2b26b901a2 100644
--- a/lib/iris/fileformats/pp.py
+++ b/lib/iris/fileformats/pp.py
@@ -1642,7 +1642,7 @@ def _interpret_fields(fields):
 def _create_field_data(field, data_shape, land_mask_field=None):
     """
     Modifies a field's ``_data`` attribute either by:
-     * converting DeferredArrayBytes into a lazy array,
+     * converting a 'deferred array bytes' tuple into a lazy array,
      * converting LoadedArrayBytes into an actual numpy array.
 
     If 'land_mask_field' is passed (not None), then it contains the associated
@@ -1735,8 +1735,8 @@ def _field_gen(filename, read_data_bytes, little_ended=False):
 
     A field returned by the generator is only "half-formed" because its
     `_data` attribute represents a simple one-dimensional stream of
-    bytes. (Encoded as an instance of either LoadedArrayBytes or
-    DeferredArrayBytes, depending on the value of `read_data_bytes`.)
+    bytes. (Encoded either as an instance of LoadedArrayBytes or as a
+    'deferred array bytes' tuple, depending on the value of `read_data_bytes`.)
     This is because fields encoded with a land/sea mask do not contain
     sufficient information within the field to determine the final
     two-dimensional shape of the data.
@@ -1825,7 +1825,10 @@ def _field_gen(filename, read_data_bytes, little_ended=False):
                 pp_field.data = LoadedArrayBytes(pp_file.read(data_len),
                                                  dtype)
             else:
-                # Provide enough context to read the data bytes later on.
+                # Provide enough context to read the data bytes later on,
+                # as a 'deferred array bytes' tuple.
+                # N.B. this used to be a namedtuple called DeferredArrayBytes,
+                # but it no longer is. Possibly for performance reasons?
                 pp_field.data = (filename, pp_file.tell(), data_len, dtype)
                 # Seek over the actual data payload.
                 pp_file_seek(data_len, os.SEEK_CUR)
@@ -2242,8 +2245,6 @@ def save_fields(fields, target, append=False):
         of the file.
         Only applicable when target is a filename, not a file handle.
         Default is False.
-    * callback:
-        A modifier/filter function.
 
     See also :func:`iris.io.save`.
 
@@ -2272,11 +2273,9 @@ def save_fields(fields, target, append=False):
 
     if isinstance(target, six.string_types):
         pp_file = open(target, "ab" if append else "wb")
-        filename = target
     elif hasattr(target, "write"):
         if hasattr(target, "mode") and "b" not in target.mode:
             raise ValueError("Target not binary")
-        filename = target.name if hasattr(target, 'name') else None
         pp_file = target
     else:
         raise ValueError("Can only save pp to filename or writable")