From b8530659c4aa60b1ebe20d0dbcea0d94c660312b Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 15 Jan 2019 21:08:24 +0000 Subject: [PATCH 1/3] Fix landsea-mask data access for UM files. --- .travis.yml | 2 +- lib/iris/fileformats/pp.py | 136 ++++++++++++++---- lib/iris/tests/test_pp_module.py | 4 +- .../unit/fileformats/pp/test_PPDataProxy.py | 6 +- .../fileformats/pp/test__create_field_data.py | 8 +- .../pp/test__data_bytes_to_shaped_array.py | 17 ++- .../fileformats/pp/test__interpret_field.py | 54 +++++-- 7 files changed, 161 insertions(+), 66 deletions(-) diff --git a/.travis.yml b/.travis.yml index ac5d341307..27c9cfe2c6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,7 @@ git: install: - > - export IRIS_TEST_DATA_REF="2f3a6bcf25f81bd152b3d66223394074c9069a96"; + export IRIS_TEST_DATA_REF="dba47566a9147645fea586f94a138e0a8d45a48e"; export IRIS_TEST_DATA_SUFFIX=$(echo "${IRIS_TEST_DATA_REF}" | sed "s/^v//"); # Cut short doctest phase under Python 2 : now only supports Python 3 diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py index 1c1d281871..57b311b7a5 100644 --- a/lib/iris/fileformats/pp.py +++ b/lib/iris/fileformats/pp.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2018, Met Office +# (C) British Crown Copyright 2010 - 2019, Met Office # # This file is part of Iris. # @@ -37,6 +37,9 @@ import numpy.ma as ma import cftime +import dask +import dask.array as da + from iris._deprecation import warn_deprecated from iris._lazy_data import as_concrete_data, as_lazy_data, is_lazy_data import iris.config @@ -602,10 +605,10 @@ class PPDataProxy(object): """A reference to the data payload of a single PP field.""" __slots__ = ('shape', 'src_dtype', 'path', 'offset', 'data_len', - '_lbpack', 'boundary_packing', 'mdi', 'mask') + '_lbpack', 'boundary_packing', 'mdi') def __init__(self, shape, src_dtype, path, offset, data_len, - lbpack, boundary_packing, mdi, mask): + lbpack, boundary_packing, mdi): self.shape = shape self.src_dtype = src_dtype self.path = path @@ -614,7 +617,6 @@ def __init__(self, shape, src_dtype, path, offset, data_len, self.lbpack = lbpack self.boundary_packing = boundary_packing self.mdi = mdi - self.mask = mask # lbpack def _lbpack_setter(self, value): @@ -649,14 +651,14 @@ def __getitem__(self, keys): self.lbpack, self.boundary_packing, self.shape, self.src_dtype, - self.mdi, self.mask) + self.mdi) data = data.__getitem__(keys) return np.asanyarray(data, dtype=self.dtype) def __repr__(self): fmt = '<{self.__class__.__name__} shape={self.shape}' \ ' src_dtype={self.dtype!r} path={self.path!r}' \ - ' offset={self.offset} mask={self.mask!r}>' + ' offset={self.offset}>' return fmt.format(self=self) def __getstate__(self): @@ -772,24 +774,29 @@ def _data_bytes_to_shaped_array(data_bytes, lbpack, boundary_packing, elif lbpack.n2 == 2: if mask is None: - raise ValueError('No mask was found to unpack the data. ' - 'Could not load.') - land_mask = mask.data.astype(np.bool) - sea_mask = ~land_mask - new_data = np.ma.masked_all(land_mask.shape) - new_data.fill_value = mdi - if lbpack.n3 == 1: - # Land mask packed data. - # Sometimes the data comes in longer than it should be (i.e. it - # looks like the compressed data is compressed, but the trailing - # data hasn't been clipped off!). - new_data[land_mask] = data[:land_mask.sum()] - elif lbpack.n3 == 2: - # Sea mask packed data. - new_data[sea_mask] = data[:sea_mask.sum()] + # If we are given no mask to apply, then just return raw data, even + # though it does not have the correct shape. + # For dask-delayed loading, this means that mask, data and the + # combination can be properly handled within a dask graph. + # However, we still mask any MDI values in the array (below). + pass else: - raise ValueError('Unsupported mask compression.') - data = new_data + land_mask = mask.data.astype(np.bool) + sea_mask = ~land_mask + new_data = np.ma.masked_all(land_mask.shape) + new_data.fill_value = mdi + if lbpack.n3 == 1: + # Land mask packed data. + # Sometimes the data comes in longer than it should be (i.e. it + # looks like the compressed data is compressed, but the + # trailing data hasn't been clipped off!). + new_data[land_mask] = data[:land_mask.sum()] + elif lbpack.n3 == 2: + # Sea mask packed data. + new_data[sea_mask] = data[:sea_mask.sum()] + else: + raise ValueError('Unsupported mask compression.') + data = new_data else: # Reform in row-column order @@ -1591,20 +1598,31 @@ def _interpret_fields(fields): (field.lbuser[3] % 1000) == 30: land_mask = field + apply_landmask = None + # Handle land compressed data payloads, # when lbpack.n2 is 2. if (field.raw_lbpack // 10 % 10) == 2: if land_mask is None: landmask_compressed_fields.append(field) + # Land-masked fields have their size+shape defined by the + # reference landmask field, so we can't yield them if they + # are encountered *before* the landmask. + # In that case, defer them, and process them all afterwards at + # the end of the file. continue - # Land compressed fields don't have a lbrow and lbnpt. + # Landmask-compressed fields don't have an lbrow and lbnpt. field.lbrow, field.lbnpt = land_mask.lbrow, land_mask.lbnpt + # Construct a data array using the landmask field as a template. + apply_landmask = land_mask - data_shape = (field.lbrow, field.lbnpt) - _create_field_data(field, data_shape, land_mask) + _create_field_data(field, (field.lbrow, field.lbnpt), + with_landmask_field=apply_landmask) yield field + # At file end, return any land-masked fields that were deferred because + # they were encountered before the landmask reference field. if landmask_compressed_fields: if land_mask is None: warnings.warn('Landmask compressed fields existed without a ' @@ -1616,16 +1634,21 @@ def _interpret_fields(fields): for field in landmask_compressed_fields: field.lbrow, field.lbnpt = mask_shape - _create_field_data(field, (field.lbrow, field.lbnpt), land_mask) + _create_field_data(field, mask_shape, + with_landmask_field=land_mask) yield field -def _create_field_data(field, data_shape, land_mask): +def _create_field_data(field, data_shape, with_landmask_field=None): """ Modifies a field's ``_data`` attribute either by: * converting DeferredArrayBytes into a lazy array, * converting LoadedArrayBytes into an actual numpy array. + If 'with_landmask_field' is passed, it is another field : The landmask + field's data is used as a template for this field's data, determining its + size, shape and the locations of valid (non-missing) datapoints. + """ if isinstance(field.core_data(), LoadedArrayBytes): loaded_bytes = field.core_data() @@ -1634,7 +1657,8 @@ def _create_field_data(field, data_shape, land_mask): field.boundary_packing, data_shape, loaded_bytes.dtype, - field.bmdi, land_mask) + field.bmdi, + with_landmask_field) else: # Wrap the reference to the data payload within a data proxy # in order to support deferred data loading. @@ -1643,9 +1667,59 @@ def _create_field_data(field, data_shape, land_mask): fname, position, n_bytes, field.raw_lbpack, field.boundary_packing, - field.bmdi, land_mask) + field.bmdi) block_shape = data_shape if 0 not in data_shape else (1, 1) - field.data = as_lazy_data(proxy, chunks=block_shape) + if with_landmask_field is None: + # For a "normal" (non-landsea-masked) field, the proxy can be + # wrapped directly as a deferred array. + field.data = as_lazy_data(proxy, chunks=block_shape) + else: + # This is a landsea-masked field, and its data must be handled in + # a different way : Because data shape/size is not known in + # advance, the data+mask calculation can't be represented + # as a dask-array operation. Instead, we make that calculation + # 'delayed', and then use 'from_delayed' to make the result back + # into a dask array -- because the final result shape *is* known. + @dask.delayed + def fetch_valid_values_array(): + # Return the data values array (shape+size unknown). + return proxy[:] + + delayed_valid_values = fetch_valid_values_array() + + # Get the mask data-array from the landsea-mask field. + # This is *either* a lazy or a real array, we don't actually care. + # If this is a deferred dependency, the delayed calc can see that. + mask_field_array = with_landmask_field.core_data() + + # Check whether this field uses a land or a sea mask. + if field.lbpack.n3 not in (1, 2): + raise ValueError('Unsupported mask compression : ' + 'lbpack.n3 = {}.'.format(field.lbpack.n3)) + if field.lbpack.n3 == 2: + # Sea-mask packing : points are inverse of the land-mask. + mask_field_array = ~mask_field_array + + # Define the mask+data calculation as a deferred operation. + # NOTE: duplicates the operation in _data_bytes_to_shaped_array. + @dask.delayed + def calc_array(mask, values): + # Note: "mask" is True at *valid* points, not missing ones. + # First ensure the mask array is boolean (not int). + mask = mask.astype(bool) + result = ma.masked_all(mask.shape, dtype=dtype) + n_values = np.sum(mask) + if n_values > 0: + # Note: data field can have excess values, but not fewer. + result[mask] = values[:n_values] + return result + + delayed_result = calc_array(mask_field_array, + delayed_valid_values) + lazy_result_array = da.from_delayed(delayed_result, + shape=block_shape, + dtype=dtype) + field.data = lazy_result_array def _field_gen(filename, read_data_bytes, little_ended=False): diff --git a/lib/iris/tests/test_pp_module.py b/lib/iris/tests/test_pp_module.py index 2c7ac0f1cf..1537c13c29 100644 --- a/lib/iris/tests/test_pp_module.py +++ b/lib/iris/tests/test_pp_module.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2013 - 2018, Met Office +# (C) British Crown Copyright 2013 - 2019, Met Office # # This file is part of Iris. # @@ -443,7 +443,7 @@ class TestPPDataProxyEquality(tests.IrisTest): def test_not_implemented(self): class Terry(object): pass pox = pp.PPDataProxy("john", "michael", "eric", "graham", "brian", - "spam", "beans", "eggs", "parrot") + "spam", "beans", "eggs") self.assertIs(pox.__eq__(Terry()), NotImplemented) self.assertIs(pox.__ne__(Terry()), NotImplemented) diff --git a/lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py b/lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py index 0732f296bb..8dc21f288f 100644 --- a/lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py +++ b/lib/iris/tests/unit/fileformats/pp/test_PPDataProxy.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2014 - 2015, Met Office +# (C) British Crown Copyright 2014 - 2019, Met Office # # This file is part of Iris. # @@ -31,14 +31,14 @@ class Test_lbpack(tests.IrisTest): def test_lbpack_SplittableInt(self): lbpack = mock.Mock(spec_set=SplittableInt) proxy = PPDataProxy(None, None, None, None, - None, lbpack, None, None, None) + None, lbpack, None, None) self.assertEqual(proxy.lbpack, lbpack) self.assertIs(proxy.lbpack, lbpack) def test_lnpack_raw(self): lbpack = 4321 proxy = PPDataProxy(None, None, None, None, - None, lbpack, None, None, None) + None, lbpack, None, None) self.assertEqual(proxy.lbpack, lbpack) self.assertIsNot(proxy.lbpack, lbpack) self.assertIsInstance(proxy.lbpack, SplittableInt) diff --git a/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py b/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py index 743d52ea0e..8821220689 100644 --- a/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py +++ b/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2013 - 2017, Met Office +# (C) British Crown Copyright 2013 - 2019, Met Office # # This file is part of Iris. # @@ -63,7 +63,6 @@ def test_deferred_bytes(self): core_data = mock.MagicMock(return_value=deferred_bytes) field = mock.Mock(core_data=core_data) data_shape = (100, 120) - land_mask = mock.Mock() proxy = mock.Mock(dtype=np.dtype('f4'), shape=data_shape, spec=pp.PPDataProxy) # We can't directly inspect the concrete data source underlying @@ -71,7 +70,7 @@ def test_deferred_bytes(self): # being created and invoked correctly. with mock.patch('iris.fileformats.pp.PPDataProxy') as PPDataProxy: PPDataProxy.return_value = proxy - pp._create_field_data(field, data_shape, land_mask) + pp._create_field_data(field, data_shape, with_landmask_field=None) # The data should be assigned via field.data. As this is a mock object # we can check the attribute directly. self.assertEqual(field.data.shape, data_shape) @@ -84,8 +83,7 @@ def test_deferred_bytes(self): n_bytes, field.raw_lbpack, field.boundary_packing, - field.bmdi, - land_mask) + field.bmdi) if __name__ == "__main__": diff --git a/lib/iris/tests/unit/fileformats/pp/test__data_bytes_to_shaped_array.py b/lib/iris/tests/unit/fileformats/pp/test__data_bytes_to_shaped_array.py index 4870624902..09ca8d99af 100644 --- a/lib/iris/tests/unit/fileformats/pp/test__data_bytes_to_shaped_array.py +++ b/lib/iris/tests/unit/fileformats/pp/test__data_bytes_to_shaped_array.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2013 - 2017, Met Office +# (C) British Crown Copyright 2013 - 2019, Met Office # # This file is part of Iris. # @@ -111,16 +111,15 @@ def create_lbpack(self, value): return pp.SplittableInt(value, name_mapping) def test_no_land_mask(self): + # Check that without a mask, it returns the raw (compressed) data. with mock.patch('numpy.frombuffer', return_value=np.arange(3)): - with self.assertRaises(ValueError) as err: - pp._data_bytes_to_shaped_array(mock.Mock(), - self.create_lbpack(120), None, - (3, 4), np.dtype('>f4'), - -999, mask=None) - self.assertEqual(str(err.exception), - ('No mask was found to unpack the data. ' - 'Could not load.')) + result = pp._data_bytes_to_shaped_array( + mock.Mock(), + self.create_lbpack(120), None, + (3, 4), np.dtype('>f4'), + -999, mask=None) + self.assertArrayAllClose(result, np.arange(3)) def test_land_mask(self): # Check basic land unpacking. diff --git a/lib/iris/tests/unit/fileformats/pp/test__interpret_field.py b/lib/iris/tests/unit/fileformats/pp/test__interpret_field.py index 1828f44a87..4273a361da 100644 --- a/lib/iris/tests/unit/fileformats/pp/test__interpret_field.py +++ b/lib/iris/tests/unit/fileformats/pp/test__interpret_field.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2013 - 2017, Met Office +# (C) British Crown Copyright 2013 - 2019, Met Office # # This file is part of Iris. # @@ -26,6 +26,7 @@ from copy import deepcopy import numpy as np +import iris import iris.fileformats.pp as pp from iris.tests import mock @@ -37,7 +38,8 @@ def setUp(self): # A field packed using a land/sea mask. self.pp_field = mock.Mock(lblrec=1, lbext=0, lbuser=[0] * 7, lbrow=0, lbnpt=0, - raw_lbpack=20, + raw_lbpack=21, + lbpack=mock.Mock(n1=0, n2=2, n3=1), core_data=core_data) # The field specifying the land/seamask. lbuser = [None, None, None, 30, None, None, 1] # m01s00i030 @@ -91,19 +93,41 @@ def test_deferred_fix_lbrow_lbnpt(self): self.assertEqual(f1.lbrow, 3) self.assertEqual(f1.lbnpt, 4) - def test_shared_land_mask_field(self): - # Check that multiple land masked fields share the - # land mask field instance. - f1 = deepcopy(self.pp_field) - f2 = deepcopy(self.pp_field) - self.assertIsNot(f1, f2) - with mock.patch('iris.fileformats.pp.PPDataProxy') as PPDataProxy: - PPDataProxy.return_value = mock.MagicMock(shape=(3, 4), - dtype=np.float32) - list(pp._interpret_fields([f1, self.land_mask_field, f2])) - for call in PPDataProxy.call_args_list: - positional_args = call[0] - self.assertIs(positional_args[8], self.land_mask_field) + @tests.skip_data + def test_landsea_unpacking_uses_dask(self): + # Ensure that the graph of the (lazy) landsea-masked data contains an + # explicit reference to a (lazy) landsea-mask field. + # Otherwise its compute() will need to invoke another compute(). + # See https://github.com/SciTools/iris/issues/3237 + + # This is too complex to explore in a mock-ist way, so let's load a + # tiny bit of real data ... + testfile_path = tests.get_data_path( + ['FF', 'landsea_masked', 'testdata_mini_lsm.ff']) + landsea_mask, soil_temp = iris.load_cubes( + testfile_path, ('land_binary_mask', 'soil_temperature')) + + # Now check that the soil-temp dask graph correctly references the + # landsea mask, in its dask graph. + lazy_mask_array = landsea_mask.core_data() + lazy_soildata_array = soil_temp.core_data() + + # Work out the main dask key for the mask data, as used by 'compute()'. + mask_toplev_key = (lazy_mask_array.name,) + (0,) * lazy_mask_array.ndim + # Get the 'main' calculation entry. + mask_toplev_item = lazy_mask_array.dask[mask_toplev_key] + # This should be a task (a simple fetch). + self.assertTrue(callable(mask_toplev_item[0])) + # Get the key (name) of the array that it fetches. + mask_data_name = mask_toplev_item[1] + + # Check that the item this refers to is a PPDataProxy. + self.assertIsInstance(lazy_mask_array.dask[mask_data_name], + pp.PPDataProxy) + + # Check that the soil-temp graph references the *same* lazy element, + # showing that the mask+data calculation is handled by dask. + self.assertIn(mask_data_name, lazy_soildata_array.dask.keys()) if __name__ == "__main__": From 918adeddc5917a29810450c70c1708023562f2a0 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 17 May 2019 15:54:30 +0100 Subject: [PATCH 2/3] Review changes. --- lib/iris/fileformats/pp.py | 61 +++++++++++-------- .../fileformats/pp/test__create_field_data.py | 2 +- 2 files changed, 38 insertions(+), 25 deletions(-) diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py index 57b311b7a5..3d248a21a2 100644 --- a/lib/iris/fileformats/pp.py +++ b/lib/iris/fileformats/pp.py @@ -1588,22 +1588,20 @@ def _interpret_fields(fields): numpy arrays (via the _create_field_data) function. """ - land_mask = None + land_mask_field = None landmask_compressed_fields = [] for field in fields: # Store the first reference to a land mask, and use this as the # definitive mask for future fields in this generator. - if land_mask is None and field.lbuser[6] == 1 and \ + if land_mask_field is None and field.lbuser[6] == 1 and \ (field.lbuser[3] // 1000) == 0 and \ (field.lbuser[3] % 1000) == 30: - land_mask = field + land_mask_field = field - apply_landmask = None - - # Handle land compressed data payloads, - # when lbpack.n2 is 2. + # Handle land compressed data payloads, when lbpack.n2 is 2. if (field.raw_lbpack // 10 % 10) == 2: - if land_mask is None: + # Field uses land-mask packing, so needs a related land-mask field. + if land_mask_field is None: landmask_compressed_fields.append(field) # Land-masked fields have their size+shape defined by the # reference landmask field, so we can't yield them if they @@ -1612,42 +1610,45 @@ def _interpret_fields(fields): # the end of the file. continue - # Landmask-compressed fields don't have an lbrow and lbnpt. - field.lbrow, field.lbnpt = land_mask.lbrow, land_mask.lbnpt - # Construct a data array using the landmask field as a template. - apply_landmask = land_mask + # Land-mask compressed fields don't have an lbrow and lbnpt. + field.lbrow, field.lbnpt = \ + land_mask_field.lbrow, land_mask_field.lbnpt + _create_field_data(field, (field.lbrow, field.lbnpt), + land_mask_field=land_mask_field) + else: + # Field does not use land-mask packing. + _create_field_data(field, (field.lbrow, field.lbnpt)) - _create_field_data(field, (field.lbrow, field.lbnpt), - with_landmask_field=apply_landmask) yield field # At file end, return any land-masked fields that were deferred because # they were encountered before the landmask reference field. if landmask_compressed_fields: - if land_mask is None: + if land_mask_field is None: warnings.warn('Landmask compressed fields existed without a ' 'landmask to decompress with. The data will have ' 'a shape of (0, 0) and will not read.') mask_shape = (0, 0) else: - mask_shape = (land_mask.lbrow, land_mask.lbnpt) + mask_shape = (land_mask_field.lbrow, land_mask_field.lbnpt) for field in landmask_compressed_fields: field.lbrow, field.lbnpt = mask_shape _create_field_data(field, mask_shape, - with_landmask_field=land_mask) + land_mask_field=land_mask_field) yield field -def _create_field_data(field, data_shape, with_landmask_field=None): +def _create_field_data(field, data_shape, land_mask_field=None): """ Modifies a field's ``_data`` attribute either by: * converting DeferredArrayBytes into a lazy array, * converting LoadedArrayBytes into an actual numpy array. - If 'with_landmask_field' is passed, it is another field : The landmask - field's data is used as a template for this field's data, determining its - size, shape and the locations of valid (non-missing) datapoints. + If 'land_mask_field' is passed (not None), then it contains the associated + landmask, which is also a field : Its data array is used as a template for + `field`'s data, determining its size, shape and the locations of all the + valid (non-missing) datapoints. """ if isinstance(field.core_data(), LoadedArrayBytes): @@ -1658,7 +1659,7 @@ def _create_field_data(field, data_shape, with_landmask_field=None): data_shape, loaded_bytes.dtype, field.bmdi, - with_landmask_field) + land_mask_field) else: # Wrap the reference to the data payload within a data proxy # in order to support deferred data loading. @@ -1669,7 +1670,7 @@ def _create_field_data(field, data_shape, with_landmask_field=None): field.boundary_packing, field.bmdi) block_shape = data_shape if 0 not in data_shape else (1, 1) - if with_landmask_field is None: + if land_mask_field is None: # For a "normal" (non-landsea-masked) field, the proxy can be # wrapped directly as a deferred array. field.data = as_lazy_data(proxy, chunks=block_shape) @@ -1690,7 +1691,7 @@ def fetch_valid_values_array(): # Get the mask data-array from the landsea-mask field. # This is *either* a lazy or a real array, we don't actually care. # If this is a deferred dependency, the delayed calc can see that. - mask_field_array = with_landmask_field.core_data() + mask_field_array = land_mask_field.core_data() # Check whether this field uses a land or a sea mask. if field.lbpack.n3 not in (1, 2): @@ -1708,6 +1709,11 @@ def calc_array(mask, values): # First ensure the mask array is boolean (not int). mask = mask.astype(bool) result = ma.masked_all(mask.shape, dtype=dtype) + # Apply the fill-value from the proxy object. + # Note: 'values' is just 'proxy' in a dask wrapper. This arg + # must be a dask type so that 'delayed' can recognise it, but + # that provides no access to the underlying fill value. + result.fill_value = proxy.mdi n_values = np.sum(mask) if n_values > 0: # Note: data field can have excess values, but not fewer. @@ -1735,6 +1741,13 @@ def _field_gen(filename, read_data_bytes, little_ended=False): sufficient information within the field to determine the final two-dimensional shape of the data. + The callers of this routine are the 'load' routines (both PP and FF). + They both filter the resulting stream of fields with `_interpret_fields`, + which replaces each field.data with an actual array (real or lazy). + This is done separately so that `_interpret_fields` can detect land-mask + fields and use them to construct data arrays for any fields which use + land/sea-mask packing. + """ dtype_endian_char = '<' if little_ended else '>' with open(filename, 'rb') as pp_file: diff --git a/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py b/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py index 8821220689..d9816c64b9 100644 --- a/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py +++ b/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py @@ -70,7 +70,7 @@ def test_deferred_bytes(self): # being created and invoked correctly. with mock.patch('iris.fileformats.pp.PPDataProxy') as PPDataProxy: PPDataProxy.return_value = proxy - pp._create_field_data(field, data_shape, with_landmask_field=None) + pp._create_field_data(field, data_shape, land_mask_field=None) # The data should be assigned via field.data. As this is a mock object # we can check the attribute directly. self.assertEqual(field.data.shape, data_shape) From 5864690c84c712fd2019d8dcf9fce1afb627ae4c Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 17 May 2019 16:17:00 +0100 Subject: [PATCH 3/3] Fixed some stale comments + unused variables. --- lib/iris/fileformats/pp.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py index 3d248a21a2..2b26b901a2 100644 --- a/lib/iris/fileformats/pp.py +++ b/lib/iris/fileformats/pp.py @@ -1642,7 +1642,7 @@ def _interpret_fields(fields): def _create_field_data(field, data_shape, land_mask_field=None): """ Modifies a field's ``_data`` attribute either by: - * converting DeferredArrayBytes into a lazy array, + * converting a 'deferred array bytes' tuple into a lazy array, * converting LoadedArrayBytes into an actual numpy array. If 'land_mask_field' is passed (not None), then it contains the associated @@ -1735,8 +1735,8 @@ def _field_gen(filename, read_data_bytes, little_ended=False): A field returned by the generator is only "half-formed" because its `_data` attribute represents a simple one-dimensional stream of - bytes. (Encoded as an instance of either LoadedArrayBytes or - DeferredArrayBytes, depending on the value of `read_data_bytes`.) + bytes. (Encoded either as an instance of LoadedArrayBytes or as a + 'deferred array bytes' tuple, depending on the value of `read_data_bytes`.) This is because fields encoded with a land/sea mask do not contain sufficient information within the field to determine the final two-dimensional shape of the data. @@ -1825,7 +1825,10 @@ def _field_gen(filename, read_data_bytes, little_ended=False): pp_field.data = LoadedArrayBytes(pp_file.read(data_len), dtype) else: - # Provide enough context to read the data bytes later on. + # Provide enough context to read the data bytes later on, + # as a 'deferred array bytes' tuple. + # N.B. this used to be a namedtuple called DeferredArrayBytes, + # but it no longer is. Possibly for performance reasons? pp_field.data = (filename, pp_file.tell(), data_len, dtype) # Seek over the actual data payload. pp_file_seek(data_len, os.SEEK_CUR) @@ -2242,8 +2245,6 @@ def save_fields(fields, target, append=False): of the file. Only applicable when target is a filename, not a file handle. Default is False. - * callback: - A modifier/filter function. See also :func:`iris.io.save`. @@ -2272,11 +2273,9 @@ def save_fields(fields, target, append=False): if isinstance(target, six.string_types): pp_file = open(target, "ab" if append else "wb") - filename = target elif hasattr(target, "write"): if hasattr(target, "mode") and "b" not in target.mode: raise ValueError("Target not binary") - filename = target.name if hasattr(target, 'name') else None pp_file = target else: raise ValueError("Can only save pp to filename or writable")